From 5533a327eb0f526cbebbe71124620fcbb0bc0649 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 22 Nov 2023 16:12:37 +0100 Subject: [PATCH 01/66] encoding/cbor: initial package implementation --- core/encoding/base64/base64.odin | 128 +-- core/encoding/cbor/cbor.odin | 680 +++++++++++++++ core/encoding/cbor/coding.odin | 825 ++++++++++++++++++ core/encoding/cbor/marshal.odin | 541 ++++++++++++ core/encoding/cbor/tags.odin | 361 ++++++++ core/encoding/cbor/unmarshal.odin | 832 +++++++++++++++++++ core/io/io.odin | 25 +- core/net/common.odin | 3 +- examples/all/all_main.odin | 2 + tests/core/Makefile | 1 + tests/core/build.bat | 1 + tests/core/encoding/cbor/test_core_cbor.odin | 719 ++++++++++++++++ 12 files changed, 4067 insertions(+), 51 deletions(-) create mode 100644 core/encoding/cbor/cbor.odin create mode 100644 core/encoding/cbor/coding.odin create mode 100644 core/encoding/cbor/marshal.odin create mode 100644 core/encoding/cbor/tags.odin create mode 100644 core/encoding/cbor/unmarshal.odin create mode 100644 tests/core/encoding/cbor/test_core_cbor.odin diff --git a/core/encoding/base64/base64.odin b/core/encoding/base64/base64.odin index cf2ea1c12..793f22c57 100644 --- a/core/encoding/base64/base64.odin +++ b/core/encoding/base64/base64.odin @@ -1,5 +1,9 @@ package base64 +import "core:io" +import "core:mem" +import "core:strings" + // @note(zh): Encoding utility for Base64 // A secondary param can be used to supply a custom alphabet to // @link(encode) and a matching decoding table to @link(decode). @@ -39,59 +43,85 @@ DEC_TABLE := [128]int { 49, 50, 51, -1, -1, -1, -1, -1, } -encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> string #no_bounds_check { - length := len(data) - if length == 0 { - return "" - } +encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> (encoded: string, err: mem.Allocator_Error) #optional_allocator_error { + out_length := encoded_length(data) + if out_length == 0 { + return + } - out_length := ((4 * length / 3) + 3) &~ 3 - out := make([]byte, out_length, allocator) + out: strings.Builder + strings.builder_init(&out, 0, out_length, allocator) or_return - c0, c1, c2, block: int + ioerr := encode_into(strings.to_stream(&out), data, ENC_TBL) + assert(ioerr == nil) - for i, d := 0, 0; i < length; i, d = i + 3, d + 4 { - c0, c1, c2 = int(data[i]), -1, -1 - - if i + 1 < length { c1 = int(data[i + 1]) } - if i + 2 < length { c2 = int(data[i + 2]) } - - block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) - - out[d] = ENC_TBL[block >> 18 & 63] - out[d + 1] = ENC_TBL[block >> 12 & 63] - out[d + 2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] - out[d + 3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] - } - return string(out) + return strings.to_string(out), nil } -decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> []byte #no_bounds_check { - length := len(data) - if length == 0 { - return nil - } +encoded_length :: #force_inline proc(data: []byte) -> int { + length := len(data) + if length == 0 { + return 0 + } - pad_count := data[length - 1] == PADDING ? (data[length - 2] == PADDING ? 2 : 1) : 0 - out_length := ((length * 6) >> 3) - pad_count - out := make([]byte, out_length, allocator) - - c0, c1, c2, c3: int - b0, b1, b2: int - - for i, j := 0, 0; i < length; i, j = i + 4, j + 3 { - c0 = DEC_TBL[data[i]] - c1 = DEC_TBL[data[i + 1]] - c2 = DEC_TBL[data[i + 2]] - c3 = DEC_TBL[data[i + 3]] - - b0 = (c0 << 2) | (c1 >> 4) - b1 = (c1 << 4) | (c2 >> 2) - b2 = (c2 << 6) | c3 - - out[j] = byte(b0) - out[j + 1] = byte(b1) - out[j + 2] = byte(b2) - } - return out + return ((4 * length / 3) + 3) &~ 3 +} + +encode_into :: proc(w: io.Writer, data: []byte, ENC_TBL := ENC_TABLE) -> (err: io.Error) #no_bounds_check { + length := len(data) + if length == 0 { + return + } + + c0, c1, c2, block: int + + for i, d := 0, 0; i < length; i, d = i + 3, d + 4 { + c0, c1, c2 = int(data[i]), -1, -1 + + if i + 1 < length { c1 = int(data[i + 1]) } + if i + 2 < length { c2 = int(data[i + 2]) } + + block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) + + out: [4]byte + out[0] = ENC_TBL[block >> 18 & 63] + out[1] = ENC_TBL[block >> 12 & 63] + out[2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] + out[3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] + + #bounds_check { io.write_full(w, out[:]) or_return } + } + return +} + +decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> (out: []byte, err: mem.Allocator_Error) #optional_allocator_error { + #no_bounds_check { + length := len(data) + if length == 0 { + return + } + + pad_count := data[length - 1] == PADDING ? (data[length - 2] == PADDING ? 2 : 1) : 0 + out_length := ((length * 6) >> 3) - pad_count + out = make([]byte, out_length, allocator) or_return + + c0, c1, c2, c3: int + b0, b1, b2: int + + for i, j := 0, 0; i < length; i, j = i + 4, j + 3 { + c0 = DEC_TBL[data[i]] + c1 = DEC_TBL[data[i + 1]] + c2 = DEC_TBL[data[i + 2]] + c3 = DEC_TBL[data[i + 3]] + + b0 = (c0 << 2) | (c1 >> 4) + b1 = (c1 << 4) | (c2 >> 2) + b2 = (c2 << 6) | c3 + + out[j] = byte(b0) + out[j + 1] = byte(b1) + out[j + 2] = byte(b2) + } + return + } } diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin new file mode 100644 index 000000000..e91c53f3c --- /dev/null +++ b/core/encoding/cbor/cbor.odin @@ -0,0 +1,680 @@ +package cbor + +import "core:encoding/json" +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:runtime" +import "core:strconv" +import "core:strings" + +// If we are decoding a stream of either a map or list, the initial capacity will be this value. +INITIAL_STREAMED_CONTAINER_CAPACITY :: 8 +// If we are decoding a stream of either text or bytes, the initial capacity will be this value. +INITIAL_STREAMED_BYTES_CAPACITY :: 16 + +// Known/common headers are defined, undefined headers can still be valid. +// Higher 3 bits is for the major type and lower 5 bits for the additional information. +Header :: enum u8 { + U8 = (u8(Major.Unsigned) << 5) | u8(Add.One_Byte), + U16 = (u8(Major.Unsigned) << 5) | u8(Add.Two_Bytes), + U32 = (u8(Major.Unsigned) << 5) | u8(Add.Four_Bytes), + U64 = (u8(Major.Unsigned) << 5) | u8(Add.Eight_Bytes), + + Neg_U8 = (u8(Major.Negative) << 5) | u8(Add.One_Byte), + Neg_U16 = (u8(Major.Negative) << 5) | u8(Add.Two_Bytes), + Neg_U32 = (u8(Major.Negative) << 5) | u8(Add.Four_Bytes), + Neg_U64 = (u8(Major.Negative) << 5) | u8(Add.Eight_Bytes), + + False = (u8(Major.Other) << 5) | u8(Add.False), + True = (u8(Major.Other) << 5) | u8(Add.True), + + Nil = (u8(Major.Other) << 5) | u8(Add.Nil), + Undefined = (u8(Major.Other) << 5) | u8(Add.Undefined), + + Simple = (u8(Major.Other) << 5) | u8(Add.One_Byte), + + F16 = (u8(Major.Other) << 5) | u8(Add.Two_Bytes), + F32 = (u8(Major.Other) << 5) | u8(Add.Four_Bytes), + F64 = (u8(Major.Other) << 5) | u8(Add.Eight_Bytes), + + Break = (u8(Major.Other) << 5) | u8(Add.Break), +} + +// The higher 3 bits of the header which denotes what type of value it is. +Major :: enum u8 { + Unsigned, + Negative, + Bytes, + Text, + Array, + Map, + Tag, + Other, +} + +// The lower 3 bits of the header which denotes additional information for the type of value. +Add :: enum u8 { + False = 20, + True = 21, + Nil = 22, + Undefined = 23, + + One_Byte = 24, + Two_Bytes = 25, + Four_Bytes = 26, + Eight_Bytes = 27, + + Length_Unknown = 31, + Break = Length_Unknown, +} + +Value :: union { + u8, + u16, + u32, + u64, + + Negative_U8, + Negative_U16, + Negative_U32, + Negative_U64, + + // Pointers so the size of the Value union stays small. + ^Bytes, + ^Text, + ^Array, + ^Map, + ^Tag, + + Simple, + f16, + f32, + f64, + bool, + Undefined, + Nil, +} + +Bytes :: []byte +Text :: string + +Array :: []Value + +Map :: []Map_Entry +Map_Entry :: struct { + key: Value, // Can be any unsigned, negative, float, Simple, bool, Text. + value: Value, +} + +Tag :: struct { + number: Tag_Number, + value: Value, // Value based on the number. +} + +Tag_Number :: u64 + +Nil :: distinct rawptr +Undefined :: distinct rawptr + +// A distinct atom-like number, range from `0..=19` and `32..=max(u8)`. +Simple :: distinct u8 +Atom :: Simple + +Unmarshal_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Decode_Data_Error, + Unmarshal_Data_Error, + Maybe(Unsupported_Type_Error), +} + +Marshal_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Encode_Data_Error, + Marshal_Data_Error, + Maybe(Unsupported_Type_Error), +} + +Decode_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Decode_Data_Error, +} + +Encode_Error :: union #shared_nil { + io.Error, + mem.Allocator_Error, + Encode_Data_Error, +} + +Decode_Data_Error :: enum { + None, + Bad_Major, // An invalid major type was encountered. + Bad_Argument, // A general unexpected value (most likely invalid additional info in header). + Bad_Tag_Value, // When the type of value for the given tag is not valid. + Nested_Indefinite_Length, // When an streamed/indefinite length container nests another, this is not allowed. + Nested_Tag, // When a tag's value is another tag, this is not allowed. + Length_Too_Big, // When the length of a container (map, array, bytes, string) is more than `max(int)`. + Break, +} + +Encode_Data_Error :: enum { + None, + Invalid_Simple, // When a simple is being encoded that is out of the range `0..=19` and `32..=max(u8)`. + Int_Too_Big, // When an int is being encoded that is larger than `max(u64)` or smaller than `min(u64)`. + Bad_Tag_Value, // When the type of value is not supported by the tag implementation. +} + +Unmarshal_Data_Error :: enum { + None, + Invalid_Parameter, // When the given `any` can not be unmarshalled into. + Non_Pointer_Parameter, // When the given `any` is not a pointer. +} + +Marshal_Data_Error :: enum { + None, + Invalid_CBOR_Tag, // When the struct tag `cbor_tag:""` is not a registered name or number. +} + +// Error that is returned when a type couldn't be marshalled into or out of, as much information +// as possible/available is added. +Unsupported_Type_Error :: struct { + id: typeid, + hdr: Header, + add: Add, +} + +_unsupported :: proc(v: any, hdr: Header, add: Add = nil) -> Maybe(Unsupported_Type_Error) { + return Unsupported_Type_Error{ + id = v.id, + hdr = hdr, + add = add, + } +} + +// Actual value is `-1 - x` (be careful of overflows). + +Negative_U8 :: distinct u8 +Negative_U16 :: distinct u16 +Negative_U32 :: distinct u32 +Negative_U64 :: distinct u64 + +// Turns the CBOR negative unsigned int type into a signed integer type. +negative_to_int :: proc { + negative_u8_to_int, + negative_u16_to_int, + negative_u32_to_int, + negative_u64_to_int, +} + +negative_u8_to_int :: #force_inline proc(u: Negative_U8) -> i16 { + return -1 - i16(u) +} + +negative_u16_to_int :: #force_inline proc(u: Negative_U16) -> i32 { + return -1 - i32(u) +} + +negative_u32_to_int :: #force_inline proc(u: Negative_U32) -> i64 { + return -1 - i64(u) +} + +negative_u64_to_int :: #force_inline proc(u: Negative_U64) -> i128 { + return -1 - i128(u) +} + +// Utility for converting between the different errors when they are subsets of the other. +err_conv :: proc { + encode_to_marshal_err, + decode_to_unmarshal_err, + decode_to_unmarshal_err_p, + decode_to_unmarshal_err_p2, +} + +encode_to_marshal_err :: #force_inline proc(err: Encode_Error) -> Marshal_Error { + switch e in err { + case nil: return nil + case io.Error: return e + case mem.Allocator_Error: return e + case Encode_Data_Error: return e + case: return nil + } +} + +decode_to_unmarshal_err :: #force_inline proc(err: Decode_Error) -> Unmarshal_Error { + switch e in err { + case nil: return nil + case io.Error: return e + case mem.Allocator_Error: return e + case Decode_Data_Error: return e + case: return nil + } +} + +decode_to_unmarshal_err_p :: #force_inline proc(v: $T, err: Decode_Error) -> (T, Unmarshal_Error) { + return v, err_conv(err) +} + +decode_to_unmarshal_err_p2 :: #force_inline proc(v: $T, v2: $T2, err: Decode_Error) -> (T, T2, Unmarshal_Error) { + return v, v2, err_conv(err) +} + +// Recursively frees all memory allocated when decoding the passed value. +destroy :: proc(val: Value, allocator := context.allocator) { + context.allocator = allocator + #partial switch v in val { + case ^Map: + if v == nil { return } + for entry in v { + destroy(entry.key) + destroy(entry.value) + } + delete(v^) + free(v) + case ^Array: + if v == nil { return } + for entry in v { + destroy(entry) + } + delete(v^) + free(v) + case ^Text: + if v == nil { return } + delete(v^) + free(v) + case ^Bytes: + if v == nil { return } + delete(v^) + free(v) + case ^Tag: + if v == nil { return } + destroy(v.value) + free(v) + } +} + +/* +diagnose either writes or returns a human-readable representation of the value, +optionally formatted, defined as the diagnostic format in section 8 of RFC 8949. + +Incidentally, if the CBOR does not contain any of the additional types defined on top of JSON +this will also be valid JSON. +*/ +diagnose :: proc { + diagnostic_string, + diagnose_to_writer, +} + +// Turns the given CBOR value into a human-readable string. +// See docs on the proc group `diagnose` for more info. +diagnostic_string :: proc(val: Value, padding := 0, allocator := context.allocator) -> (string, mem.Allocator_Error) #optional_allocator_error { + b := strings.builder_make(allocator) + w := strings.to_stream(&b) + err := diagnose_to_writer(w, val, padding) + if err == .EOF { + // The string builder stream only returns .EOF, and only if it can't write (out of memory). + return "", .Out_Of_Memory + } + assert(err == nil) + + return strings.to_string(b), nil +} + +// Writes the given CBOR value into the writer as human-readable text. +// See docs on the proc group `diagnose` for more info. +diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { + @(require_results) + indent :: proc(padding: int) -> int { + padding := padding + if padding != -1 { + padding += 1 + } + return padding + } + + @(require_results) + dedent :: proc(padding: int) -> int { + padding := padding + if padding != -1 { + padding -= 1 + } + return padding + } + + comma :: proc(w: io.Writer, padding: int) -> io.Error { + _ = io.write_string(w, ", " if padding == -1 else ",") or_return + return nil + } + + newline :: proc(w: io.Writer, padding: int) -> io.Error { + if padding != -1 { + io.write_string(w, "\n") or_return + for _ in 0.. (Value, mem.Allocator_Error) #optional_allocator_error { + internal :: proc(val: json.Value) -> (ret: Value, err: mem.Allocator_Error) { + switch v in val { + case json.Null: return Nil{}, nil + case json.Integer: + i, major := _int_to_uint(v) + #partial switch major { + case .Unsigned: return i, nil + case .Negative: return Negative_U64(i), nil + case: unreachable() + } + case json.Float: return v, nil + case json.Boolean: return v, nil + case json.String: + container := new(Text) or_return + + // We need the string to have a nil byte at the end so we clone to cstring. + container^ = string(strings.clone_to_cstring(v) or_return) + return container, nil + case json.Array: + arr := new(Array) or_return + arr^ = make([]Value, len(v)) or_return + for _, i in arr { + arr[i] = internal(v[i]) or_return + } + return arr, nil + case json.Object: + m := new(Map) or_return + dm := make([dynamic]Map_Entry, 0, len(v)) or_return + for mkey, mval in v { + append(&dm, Map_Entry{from_json(mkey) or_return, from_json(mval) or_return}) + } + m^ = dm[:] + return m, nil + } + return nil, nil + } + + context.allocator = allocator + return internal(val) +} + +/* +Converts from CBOR to JSON. + +NOTE: overflow on integers or floats is not handled. + +Everything is copied to the given allocator, the passed in CBOR value can be `destroy`'ed after. + +If a CBOR map with non-string keys is encountered it is turned into an array of tuples. +*/ +to_json :: proc(val: Value, allocator := context.allocator) -> (json.Value, mem.Allocator_Error) #optional_allocator_error { + internal :: proc(val: Value) -> (ret: json.Value, err: mem.Allocator_Error) { + switch v in val { + case Simple: return json.Integer(v), nil + + case u8: return json.Integer(v), nil + case u16: return json.Integer(v), nil + case u32: return json.Integer(v), nil + case u64: return json.Integer(v), nil + + case Negative_U8: return json.Integer(negative_to_int(v)), nil + case Negative_U16: return json.Integer(negative_to_int(v)), nil + case Negative_U32: return json.Integer(negative_to_int(v)), nil + case Negative_U64: return json.Integer(negative_to_int(v)), nil + + case f16: return json.Float(v), nil + case f32: return json.Float(v), nil + case f64: return json.Float(v), nil + + case bool: return json.Boolean(v), nil + + case Undefined: return json.Null{}, nil + case Nil: return json.Null{}, nil + + case ^Bytes: return json.String(strings.clone(string(v^)) or_return), nil + case ^Text: return json.String(strings.clone(v^) or_return), nil + + case ^Map: + keys_all_strings :: proc(m: ^Map) -> bool { + for entry in m { + #partial switch kv in entry.key { + case ^Bytes: + case ^Text: + case: return false + } + } + return false + } + + if keys_all_strings(v) { + obj := make(json.Object, len(v)) or_return + for entry in v { + k: string + #partial switch kv in entry.key { + case ^Bytes: k = string(kv^) + case ^Text: k = kv^ + case: unreachable() + } + + v := internal(entry.value) or_return + obj[k] = v + } + return obj, nil + } else { + // Resort to an array of tuples if keys aren't all strings. + arr := make(json.Array, 0, len(v)) or_return + for entry in v { + entry_arr := make(json.Array, 0, 2) or_return + append(&entry_arr, internal(entry.key) or_return) or_return + append(&entry_arr, internal(entry.value) or_return) or_return + append(&arr, entry_arr) or_return + } + return arr, nil + } + + case ^Array: + arr := make(json.Array, 0, len(v)) or_return + for entry in v { + append(&arr, internal(entry) or_return) or_return + } + return arr, nil + + case ^Tag: + obj := make(json.Object, 2) or_return + obj[strings.clone("number") or_return] = internal(v.number) or_return + obj[strings.clone("value") or_return] = internal(v.value) or_return + return obj, nil + + case: return json.Null{}, nil + } + } + + context.allocator = allocator + return internal(val) +} + +_int_to_uint :: proc { + _i8_to_uint, + _i16_to_uint, + _i32_to_uint, + _i64_to_uint, + _i128_to_uint, +} + +_u128_to_u64 :: #force_inline proc(v: u128) -> (u64, Encode_Data_Error) { + if v > u128(max(u64)) { + return 0, .Int_Too_Big + } + + return u64(v), nil +} + +_i8_to_uint :: #force_inline proc(v: i8) -> (u: u8, m: Major) { + if v < 0 { + return u8(abs(v)-1), .Negative + } + + return u8(v), .Unsigned +} + +_i16_to_uint :: #force_inline proc(v: i16) -> (u: u16, m: Major) { + if v < 0 { + return u16(abs(v)-1), .Negative + } + + return u16(v), .Unsigned +} + +_i32_to_uint :: #force_inline proc(v: i32) -> (u: u32, m: Major) { + if v < 0 { + return u32(abs(v)-1), .Negative + } + + return u32(v), .Unsigned +} + +_i64_to_uint :: #force_inline proc(v: i64) -> (u: u64, m: Major) { + if v < 0 { + return u64(abs(v)-1), .Negative + } + + return u64(v), .Unsigned +} + +_i128_to_uint :: proc(v: i128) -> (u: u64, m: Major, err: Encode_Data_Error) { + if v < 0 { + m = .Negative + u, err = _u128_to_u64(u128(abs(v) - 1)) + return + } + + m = .Unsigned + u, err = _u128_to_u64(u128(v)) + return +} + +@(private) +is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool { + if ti == nil { + return false + } + t := runtime.type_info_base(ti) + #partial switch info in t.variant { + case runtime.Type_Info_Integer: + switch info.endianness { + case .Platform: return false + case .Little: return ODIN_ENDIAN != .Little + case .Big: return ODIN_ENDIAN != .Big + } + } + return false +} + diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin new file mode 100644 index 000000000..5c14d8f87 --- /dev/null +++ b/core/encoding/cbor/coding.odin @@ -0,0 +1,825 @@ +package cbor + +import "core:bytes" +import "core:encoding/endian" +import "core:intrinsics" +import "core:io" +import "core:slice" +import "core:strings" + +Encoder_Flag :: enum { + // CBOR defines a tag header that also acts as a file/binary header, + // this way decoders can check the first header of the binary and see if it is CBOR. + Self_Described_CBOR, + + // Integers are stored in the smallest integer type it fits. + // This involves checking each int against the max of all its smaller types. + Deterministic_Int_Size, + + // Floats are stored in the smallest size float type without losing precision. + // This involves casting each float down to its smaller types and checking if it changed. + Deterministic_Float_Size, + + // Sort maps by their keys in bytewise lexicographic order of their deterministic encoding. + // NOTE: In order to do this, all keys of a map have to be pre-computed, sorted, and + // then written, this involves temporary allocations for the keys and a copy of the map itself. + Deterministic_Map_Sorting, + + // Internal flag to do initialization. + _In_Progress, +} + +Encoder_Flags :: bit_set[Encoder_Flag] + +// Flags for fully deterministic output (if you are not using streaming/indeterminate length). +ENCODE_FULLY_DETERMINISTIC :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size, .Deterministic_Map_Sorting} +// Flags for the smallest encoding output. +ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} +// Flags for the fastest encoding output. +ENCODE_FAST :: Encoder_Flags{} + +Encoder :: struct { + flags: Encoder_Flags, + writer: io.Writer, +} + +/* +Decodes both deterministic and non-deterministic CBOR into a `Value` variant. + +`Text` and `Bytes` can safely be cast to cstrings because of an added 0 byte. + +Allocations are done using the given allocator, +*no* allocations are done on the `context.temp_allocator`. + +A value can be (fully and recursively) deallocated using the `destroy` proc in this package. +*/ +decode :: proc { + decode_string, + decode_reader, +} + +// Decodes the given string as CBOR. +// See docs on the proc group `decode` for more information. +decode_string :: proc(s: string, allocator := context.allocator) -> (v: Value, err: Decode_Error) { + context.allocator = allocator + + r: strings.Reader + strings.reader_init(&r, s) + return decode(strings.reader_to_stream(&r), allocator=allocator) +} + +// Reads a CBOR value from the given reader. +// See docs on the proc group `decode` for more information. +decode_reader :: proc(r: io.Reader, hdr: Header = Header(0), allocator := context.allocator) -> (v: Value, err: Decode_Error) { + context.allocator = allocator + + hdr := hdr + if hdr == Header(0) { hdr = _decode_header(r) or_return } + switch hdr { + case .U8: return _decode_u8 (r) + case .U16: return _decode_u16(r) + case .U32: return _decode_u32(r) + case .U64: return _decode_u64(r) + + case .Neg_U8: return Negative_U8 (_decode_u8 (r) or_return), nil + case .Neg_U16: return Negative_U16(_decode_u16(r) or_return), nil + case .Neg_U32: return Negative_U32(_decode_u32(r) or_return), nil + case .Neg_U64: return Negative_U64(_decode_u64(r) or_return), nil + + case .Simple: return _decode_simple(r) + + case .F16: return _decode_f16(r) + case .F32: return _decode_f32(r) + case .F64: return _decode_f64(r) + + case .True: return true, nil + case .False: return false, nil + + case .Nil: return Nil{}, nil + case .Undefined: return Undefined{}, nil + + case .Break: return nil, .Break + } + + maj, add := _header_split(hdr) + switch maj { + case .Unsigned: return _decode_tiny_u8(add) + case .Negative: return Negative_U8(_decode_tiny_u8(add) or_return), nil + case .Bytes: return _decode_bytes_ptr(r, add) + case .Text: return _decode_text_ptr(r, add) + case .Array: return _decode_array_ptr(r, add) + case .Map: return _decode_map_ptr(r, add) + case .Tag: return _decode_tag_ptr(r, add) + case .Other: return _decode_tiny_simple(add) + case: return nil, .Bad_Major + } +} + +/* +Encodes the CBOR value into a binary CBOR. + +Flags can be used to control the output (mainly determinism, which coincidently affects size). + +The default flags `ENCODE_SMALL` (`.Deterministic_Int_Size`, `.Deterministic_Float_Size`) will try +to put ints and floats into their smallest possible byte size without losing equality. + +Adding the `.Self_Described_CBOR` flag will wrap the value in a tag that lets generic decoders know +the contents are CBOR from just reading the first byte. + +Adding the `.Deterministic_Map_Sorting` flag will sort the encoded maps by the byte content of the +encoded key. This flag has a cost on performance and memory efficiency because all keys in a map +have to be precomputed, sorted and only then written to the output. + +Empty flags will do nothing extra to the value. + +The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +but are followed by the necessary `delete` and `free` calls if the allocator supports them. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary +allocations until the end. +*/ +encode_into :: proc { + encode_into_bytes, + encode_into_builder, + encode_into_writer, + encode_into_encoder, +} +encode :: encode_into + +// Encodes the CBOR value into binary CBOR allocated on the given allocator. +// See the docs on the proc group `encode_into` for more info. +encode_into_bytes :: proc(v: Value, flags := ENCODE_SMALL, allocator := context.allocator) -> (data: []byte, err: Encode_Error) { + b := strings.builder_make(allocator) or_return + encode_into_builder(&b, v, flags) or_return + return b.buf[:], nil +} + +// Encodes the CBOR value into binary CBOR written to the given builder. +// See the docs on the proc group `encode_into` for more info. +encode_into_builder :: proc(b: ^strings.Builder, v: Value, flags := ENCODE_SMALL) -> Encode_Error { + return encode_into_writer(strings.to_stream(b), v, flags) +} + +// Encodes the CBOR value into binary CBOR written to the given writer. +// See the docs on the proc group `encode_into` for more info. +encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Encode_Error { + return encode_into_encoder(Encoder{flags, w}, v) +} + +// Encodes the CBOR value into binary CBOR written to the given encoder. +// See the docs on the proc group `encode_into` for more info. +encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { + e := e + + outer: bool + defer if outer { + e.flags &~= {._In_Progress} + } + + if ._In_Progress not_in e.flags { + outer = true + e.flags |= {._In_Progress} + + if .Self_Described_CBOR in e.flags { + _encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return + } + } + + switch v_spec in v { + case u8: return _encode_u8(e.writer, v_spec, .Unsigned) + case u16: return _encode_u16(e, v_spec, .Unsigned) + case u32: return _encode_u32(e, v_spec, .Unsigned) + case u64: return _encode_u64(e, v_spec, .Unsigned) + case Negative_U8: return _encode_u8(e.writer, u8(v_spec), .Negative) + case Negative_U16: return _encode_u16(e, u16(v_spec), .Negative) + case Negative_U32: return _encode_u32(e, u32(v_spec), .Negative) + case Negative_U64: return _encode_u64(e, u64(v_spec), .Negative) + case ^Bytes: return _encode_bytes(e, v_spec^) + case ^Text: return _encode_text(e, v_spec^) + case ^Array: return _encode_array(e, v_spec^) + case ^Map: return _encode_map(e, v_spec^) + case ^Tag: return _encode_tag(e, v_spec^) + case Simple: return _encode_simple(e.writer, v_spec) + case f16: return _encode_f16(e.writer, v_spec) + case f32: return _encode_f32(e, v_spec) + case f64: return _encode_f64(e, v_spec) + case bool: return _encode_bool(e.writer, v_spec) + case Nil: return _encode_nil(e.writer) + case Undefined: return _encode_undefined(e.writer) + case: return nil + } +} + +_decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { + buf: [1]byte + io.read_full(r, buf[:]) or_return + return Header(buf[0]), nil +} + +_header_split :: proc(hdr: Header) -> (Major, Add) { + return Major(u8(hdr) >> 5), Add(u8(hdr) & 0x1f) +} + +_decode_u8 :: proc(r: io.Reader) -> (v: u8, err: io.Error) { + byte: [1]byte + io.read_full(r, byte[:]) or_return + return byte[0], nil +} + +_encode_uint :: proc { + _encode_u8, + _encode_u16, + _encode_u32, + _encode_u64, +} + +_encode_u8 :: proc(w: io.Writer, v: u8, major: Major = .Unsigned) -> (err: io.Error) { + header := u8(major) << 5 + if v < u8(Add.One_Byte) { + header |= v + _, err = io.write_full(w, {header}) + return + } + + header |= u8(Add.One_Byte) + _, err = io.write_full(w, {header, v}) + return +} + +_decode_tiny_u8 :: proc(additional: Add) -> (u8, Decode_Data_Error) { + if intrinsics.expect(additional < .One_Byte, true) { + return u8(additional), nil + } + + return 0, .Bad_Argument +} + +_decode_u16 :: proc(r: io.Reader) -> (v: u16, err: io.Error) { + bytes: [2]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u16be(bytes[:]), nil +} + +_encode_u16 :: proc(e: Encoder, v: u16, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u16_exact(e.writer, v, major) +} + +_encode_u16_exact :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [3]byte + bytes[0] = (u8(major) << 5) | u8(Add.Two_Bytes) + endian.unchecked_put_u16be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_u32 :: proc(r: io.Reader) -> (v: u32, err: io.Error) { + bytes: [4]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u32be(bytes[:]), nil +} + +_encode_u32 :: proc(e: Encoder, v: u32, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u32_exact(e.writer, v, major) +} + +_encode_u32_exact :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [5]byte + bytes[0] = (u8(major) << 5) | u8(Add.Four_Bytes) + endian.unchecked_put_u32be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_u64 :: proc(r: io.Reader) -> (v: u64, err: io.Error) { + bytes: [8]byte + io.read_full(r, bytes[:]) or_return + return endian.unchecked_get_u64be(bytes[:]), nil +} + +_encode_u64 :: proc(e: Encoder, v: u64, major: Major = .Unsigned) -> Encode_Error { + if .Deterministic_Int_Size in e.flags { + return _encode_deterministic_uint(e.writer, v, major) + } + return _encode_u64_exact(e.writer, v, major) +} + +_encode_u64_exact :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> (err: io.Error) { + bytes: [9]byte + bytes[0] = (u8(major) << 5) | u8(Add.Eight_Bytes) + endian.unchecked_put_u64be(bytes[1:], v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_bytes_ptr :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: ^Bytes, err: Decode_Error) { + v = new(Bytes) or_return + defer if err != nil { free(v) } + + v^ = _decode_bytes(r, add, type) or_return + return +} + +_decode_bytes :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + + n_items := _n_items.? or_else INITIAL_STREAMED_BYTES_CAPACITY + + if length_is_unknown { + buf: strings.Builder + buf.buf = make([dynamic]byte, 0, n_items) or_return + defer if err != nil { strings.builder_destroy(&buf) } + + buf_stream := strings.to_stream(&buf) + + for { + header := _decode_header(r) or_return + maj, add := _header_split(header) + + #partial switch maj { + case type: + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + if length_is_unknown { + return nil, .Nested_Indefinite_Length + } + n_items := i64(_n_items.?) + + copied := io.copy_n(buf_stream, r, n_items) or_return + assert(copied == n_items) + + case .Other: + if add != .Break { return nil, .Bad_Argument } + + v = buf.buf[:] + + // Write zero byte so this can be converted to cstring. + io.write_full(buf_stream, {0}) or_return + shrink(&buf.buf) // Ignoring error, this is not critical to succeed. + return + + case: + return nil, .Bad_Major + } + } + } else { + v = make([]byte, n_items + 1) or_return // Space for the bytes and a zero byte. + defer if err != nil { delete(v) } + + io.read_full(r, v[:n_items]) or_return + + v = v[:n_items] // Take off zero byte. + return + } +} + +_encode_bytes :: proc(e: Encoder, val: Bytes, major: Major = .Bytes) -> (err: Encode_Error) { + assert(len(val) >= 0) + _encode_u64(e, u64(len(val)), major) or_return + _, err = io.write_full(e.writer, val[:]) + return +} + +_decode_text_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Text, err: Decode_Error) { + v = new(Text) or_return + defer if err != nil { free(v) } + + v^ = _decode_text(r, add) or_return + return +} + +_decode_text :: proc(r: io.Reader, add: Add) -> (v: Text, err: Decode_Error) { + return (Text)(_decode_bytes(r, add, .Text) or_return), nil +} + +_encode_text :: proc(e: Encoder, val: Text) -> Encode_Error { + return _encode_bytes(e, transmute([]byte)val, .Text) +} + +_decode_array_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Array, err: Decode_Error) { + v = new(Array) or_return + defer if err != nil { free(v) } + + v^ = _decode_array(r, add) or_return + return +} + +_decode_array :: proc(r: io.Reader, add: Add) -> (v: Array, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + array := make([dynamic]Value, 0, n_items) or_return + defer if err != nil { + for entry in array { destroy(entry) } + delete(array) + } + + for i := 0; length_is_unknown || i < n_items; i += 1 { + val, verr := decode(r) + if length_is_unknown && verr == .Break { + break + } else if verr != nil { + err = verr + return + } + + append(&array, val) or_return + } + + shrink(&array) + v = array[:] + return +} + +_encode_array :: proc(e: Encoder, arr: Array) -> Encode_Error { + assert(len(arr) >= 0) + _encode_u64(e, u64(len(arr)), .Array) + for val in arr { + encode(e, val) or_return + } + return nil +} + +_decode_map_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Map, err: Decode_Error) { + v = new(Map) or_return + defer if err != nil { free(v) } + + v^ = _decode_map(r, add) or_return + return +} + +_decode_map :: proc(r: io.Reader, add: Add) -> (v: Map, err: Decode_Error) { + _n_items, length_is_unknown := _decode_container_length(r, add) or_return + n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + items := make([dynamic]Map_Entry, 0, n_items) or_return + defer if err != nil { + for entry in items { + destroy(entry.key) + destroy(entry.value) + } + delete(items) + } + + for i := 0; length_is_unknown || i < n_items; i += 1 { + key, kerr := decode(r) + if length_is_unknown && kerr == .Break { + break + } else if kerr != nil { + return nil, kerr + } + + value := decode(r) or_return + + append(&items, Map_Entry{ + key = key, + value = value, + }) or_return + } + + shrink(&items) + v = items[:] + return +} + +_encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { + assert(len(m) >= 0) + _encode_u64(e, u64(len(m)), .Map) or_return + + if .Deterministic_Map_Sorting not_in e.flags { + for entry in m { + encode(e, entry.key) or_return + encode(e, entry.value) or_return + } + return + } + + // Deterministic_Map_Sorting needs us to sort the entries by the byte contents of the + // encoded key. + // + // This means we have to store and sort them before writing incurring extra (temporary) allocations. + + Map_Entry_With_Key :: struct { + encoded_key: []byte, + entry: Map_Entry, + } + + entries := make([]Map_Entry_With_Key, len(m), context.temp_allocator) or_return + defer delete(entries, context.temp_allocator) + + for &entry, i in entries { + entry.entry = m[i] + + buf := strings.builder_make(0, 8, context.temp_allocator) or_return + + ke := e + ke.writer = strings.to_stream(&buf) + + encode(ke, entry.entry.key) or_return + entry.encoded_key = buf.buf[:] + } + + // Sort lexicographic on the bytes of the key. + slice.sort_by_cmp(entries, proc(a, b: Map_Entry_With_Key) -> slice.Ordering { + return slice.Ordering(bytes.compare(a.encoded_key, b.encoded_key)) + }) + + for entry in entries { + io.write_full(e.writer, entry.encoded_key) or_return + delete(entry.encoded_key, context.temp_allocator) + + encode(e, entry.entry.value) or_return + } + + return nil +} + +_decode_tag_ptr :: proc(r: io.Reader, add: Add) -> (v: Value, err: Decode_Error) { + tag := _decode_tag(r, add) or_return + if t, ok := tag.?; ok { + defer if err != nil { destroy(t.value) } + tp := new(Tag) or_return + tp^ = t + return tp, nil + } + + // no error, no tag, this was the self described CBOR tag, skip it. + return decode(r) +} + +_decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error) { + num := _decode_tag_nr(r, add) or_return + + // CBOR can be wrapped in a tag that decoders can use to see/check if the binary data is CBOR. + // We can ignore it here. + if num == TAG_SELF_DESCRIBED_CBOR { + return + } + + t := Tag{ + number = num, + value = decode(r) or_return, + } + + if nested, ok := t.value.(^Tag); ok { + destroy(nested) + return nil, .Nested_Tag + } + + return t, nil +} + +_decode_tag_nr :: proc(r: io.Reader, add: Add) -> (nr: Tag_Number, err: Decode_Error) { + #partial switch add { + case .One_Byte: return u64(_decode_u8(r) or_return), nil + case .Two_Bytes: return u64(_decode_u16(r) or_return), nil + case .Four_Bytes: return u64(_decode_u32(r) or_return), nil + case .Eight_Bytes: return u64(_decode_u64(r) or_return), nil + case: return u64(_decode_tiny_u8(add) or_return), nil + } +} + +_encode_tag :: proc(e: Encoder, val: Tag) -> Encode_Error { + _encode_u64(e, val.number, .Tag) or_return + return encode(e, val.value) +} + +_decode_simple :: proc(r: io.Reader) -> (v: Simple, err: io.Error) { + buf: [1]byte + io.read_full(r, buf[:]) or_return + return Simple(buf[0]), nil +} + +_encode_simple :: proc(w: io.Writer, v: Simple) -> (err: Encode_Error) { + header := u8(Major.Other) << 5 + + if v < Simple(Add.False) { + header |= u8(v) + _, err = io.write_full(w, {header}) + return + } else if v <= Simple(Add.Break) { + return .Invalid_Simple + } + + header |= u8(Add.One_Byte) + _, err = io.write_full(w, {header, u8(v)}) + return +} + +_decode_tiny_simple :: proc(add: Add) -> (Simple, Decode_Data_Error) { + if add < Add.False { + return Simple(add), nil + } + + return 0, .Bad_Argument +} + +_decode_f16 :: proc(r: io.Reader) -> (v: f16, err: io.Error) { + bytes: [2]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u16be(bytes[:]) + return transmute(f16)n, nil +} + +_encode_f16 :: proc(w: io.Writer, v: f16) -> (err: io.Error) { + bytes: [3]byte + bytes[0] = u8(Header.F16) + endian.unchecked_put_u16be(bytes[1:], transmute(u16)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_f32 :: proc(r: io.Reader) -> (v: f32, err: io.Error) { + bytes: [4]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u32be(bytes[:]) + return transmute(f32)n, nil +} + +_encode_f32 :: proc(e: Encoder, v: f32) -> io.Error { + if .Deterministic_Float_Size in e.flags { + return _encode_deterministic_float(e.writer, v) + } + return _encode_f32_exact(e.writer, v) +} + +_encode_f32_exact :: proc(w: io.Writer, v: f32) -> (err: io.Error) { + bytes: [5]byte + bytes[0] = u8(Header.F32) + endian.unchecked_put_u32be(bytes[1:], transmute(u32)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_decode_f64 :: proc(r: io.Reader) -> (v: f64, err: io.Error) { + bytes: [8]byte + io.read_full(r, bytes[:]) or_return + n := endian.unchecked_get_u64be(bytes[:]) + return transmute(f64)n, nil +} + +_encode_f64 :: proc(e: Encoder, v: f64) -> io.Error { + if .Deterministic_Float_Size in e.flags { + return _encode_deterministic_float(e.writer, v) + } + return _encode_f64_exact(e.writer, v) +} + +_encode_f64_exact :: proc(w: io.Writer, v: f64) -> (err: io.Error) { + bytes: [9]byte + bytes[0] = u8(Header.F64) + endian.unchecked_put_u64be(bytes[1:], transmute(u64)v) + _, err = io.write_full(w, bytes[:]) + return +} + +_encode_bool :: proc(w: io.Writer, v: bool) -> (err: io.Error) { + switch v { + case true: _, err = io.write_full(w, {u8(Header.True )}); return + case false: _, err = io.write_full(w, {u8(Header.False)}); return + case: unreachable() + } +} + +_encode_undefined :: proc(w: io.Writer) -> io.Error { + _, err := io.write_full(w, {u8(Header.Undefined)}) + return err +} + +_encode_nil :: proc(w: io.Writer) -> io.Error { + _, err := io.write_full(w, {u8(Header.Nil)}) + return err +} + +// Streaming + +encode_stream_begin :: proc(w: io.Writer, major: Major) -> (err: io.Error) { + assert(major >= Major(.Bytes) && major <= Major(.Map), "illegal stream type") + + header := (u8(major) << 5) | u8(Add.Length_Unknown) + _, err = io.write_full(w, {header}) + return +} + +encode_stream_end :: proc(w: io.Writer) -> io.Error { + header := (u8(Major.Other) << 5) | u8(Add.Break) + _, err := io.write_full(w, {header}) + return err +} + +encode_stream_bytes :: _encode_bytes +encode_stream_text :: _encode_text +encode_stream_array_item :: encode + +encode_stream_map_entry :: proc(e: Encoder, key: Value, val: Value) -> Encode_Error { + encode(e, key) or_return + return encode(e, val) +} + +// + +_decode_container_length :: proc(r: io.Reader, add: Add) -> (length: Maybe(int), is_unknown: bool, err: Decode_Error) { + if add == Add.Length_Unknown { return nil, true, nil } + #partial switch add { + case .One_Byte: length = int(_decode_u8(r) or_return) + case .Two_Bytes: length = int(_decode_u16(r) or_return) + case .Four_Bytes: + big_length := _decode_u32(r) or_return + if u64(big_length) > u64(max(int)) { + err = .Length_Too_Big + return + } + length = int(big_length) + case .Eight_Bytes: + big_length := _decode_u64(r) or_return + if big_length > u64(max(int)) { + err = .Length_Too_Big + return + } + length = int(big_length) + case: + length = int(_decode_tiny_u8(add) or_return) + } + return +} + +// Deterministic encoding is (among other things) encoding all values into their smallest +// possible representation. +// See section 4 of RFC 8949. + +_encode_deterministic_uint :: proc { + _encode_u8, + _encode_deterministic_u16, + _encode_deterministic_u32, + _encode_deterministic_u64, + _encode_deterministic_u128, +} + +_encode_deterministic_u16 :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u16(max(u8)): return _encode_u8(w, u8(v), major) + case: return _encode_u16_exact(w, v, major) + } +} + +_encode_deterministic_u32 :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u32(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u32(max(u16)): return _encode_u16_exact(w, u16(v), major) + case: return _encode_u32_exact(w, u32(v), major) + } +} + +_encode_deterministic_u64 :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u64(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u64(max(u16)): return _encode_u16_exact(w, u16(v), major) + case v <= u64(max(u32)): return _encode_u32_exact(w, u32(v), major) + case: return _encode_u64_exact(w, u64(v), major) + } +} + +_encode_deterministic_u128 :: proc(w: io.Writer, v: u128, major: Major = .Unsigned) -> Encode_Error { + switch { + case v <= u128(max(u8)): return _encode_u8(w, u8(v), major) + case v <= u128(max(u16)): return _encode_u16_exact(w, u16(v), major) + case v <= u128(max(u32)): return _encode_u32_exact(w, u32(v), major) + case v <= u128(max(u64)): return _encode_u64_exact(w, u64(v), major) + case: return .Int_Too_Big + } +} + +_encode_deterministic_negative :: #force_inline proc(w: io.Writer, v: $T) -> Encode_Error + where T == Negative_U8 || T == Negative_U16 || T == Negative_U32 || T == Negative_U64 { + return _encode_deterministic_uint(w, v, .Negative) +} + +// A Deterministic float is a float in the smallest type that stays the same after down casting. +_encode_deterministic_float :: proc { + _encode_f16, + _encode_deterministic_f32, + _encode_deterministic_f64, +} + +_encode_deterministic_f32 :: proc(w: io.Writer, v: f32) -> io.Error { + if (f32(f16(v)) == v) { + return _encode_f16(w, f16(v)) + } + + return _encode_f32_exact(w, v) +} + +_encode_deterministic_f64 :: proc(w: io.Writer, v: f64) -> io.Error { + if (f64(f16(v)) == v) { + return _encode_f16(w, f16(v)) + } + + if (f64(f32(v)) == v) { + return _encode_f32_exact(w, f32(v)) + } + + return _encode_f64_exact(w, v) +} diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin new file mode 100644 index 000000000..aab2defb2 --- /dev/null +++ b/core/encoding/cbor/marshal.odin @@ -0,0 +1,541 @@ +package cbor + +import "core:bytes" +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:slice" +import "core:strconv" +import "core:strings" +import "core:unicode/utf8" + +/* +Marshal a value into binary CBOR. + +Flags can be used to control the output (mainly determinism, which coincidently affects size). + +The default flags `ENCODE_SMALL` (`.Deterministic_Int_Size`, `.Deterministic_Float_Size`) will try +to put ints and floats into their smallest possible byte size without losing equality. + +Adding the `.Self_Described_CBOR` flag will wrap the value in a tag that lets generic decoders know +the contents are CBOR from just reading the first byte. + +Adding the `.Deterministic_Map_Sorting` flag will sort the encoded maps by the byte content of the +encoded key. This flag has a cost on performance and memory efficiency because all keys in a map +have to be precomputed, sorted and only then written to the output. + +Empty flags will do nothing extra to the value. + +The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +but are followed by the necessary `delete` and `free` calls if the allocator supports them. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary +allocations until the end. +*/ +marshal_into :: proc { + marshal_into_bytes, + marshal_into_builder, + marshal_into_writer, + marshal_into_encoder, +} + +marshal :: marshal_into + +// Marshals the given value into a CBOR byte stream (allocated using the given allocator). +// See docs on the `marshal_into` proc group for more info. +marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.allocator) -> (bytes: []byte, err: Marshal_Error) { + b, alloc_err := strings.builder_make(allocator) + // The builder as a stream also returns .EOF if it ran out of memory so this is consistent. + if alloc_err != nil { + return nil, .EOF + } + + defer if err != nil { strings.builder_destroy(&b) } + + if err = marshal_into_builder(&b, v, flags); err != nil { + return + } + + return b.buf[:], nil +} + +// Marshals the given value into a CBOR byte stream written to the given builder. +// See docs on the `marshal_into` proc group for more info. +marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL) -> Marshal_Error { + return marshal_into_writer(strings.to_writer(b), v, flags) +} + +// Marshals the given value into a CBOR byte stream written to the given writer. +// See docs on the `marshal_into` proc group for more info. +marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Marshal_Error { + encoder := Encoder{flags, w} + return marshal_into_encoder(encoder, v) +} + +// Marshals the given value into a CBOR byte stream written to the given encoder. +// See docs on the `marshal_into` proc group for more info. +marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { + e := e + + init: bool + defer if init { + e.flags &~= {._In_Progress} + } + + // If not in progress we do initialization and set in progress. + if ._In_Progress not_in e.flags { + init = true + e.flags |= {._In_Progress} + + if .Self_Described_CBOR in e.flags { + err_conv(_encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag)) or_return + } + } + + if v == nil { + return _encode_nil(e.writer) + } + + // Check if type has a tag implementation to use. + if impl, ok := _tag_implementations_type[v.id]; ok { + return impl->marshal(e, v) + } + + ti := runtime.type_info_base(type_info_of(v.id)) + a := any{v.data, ti.id} + + #partial switch info in ti.variant { + case runtime.Type_Info_Named: + unreachable() + + case runtime.Type_Info_Pointer: + switch vv in v { + case Undefined: return _encode_undefined(e.writer) + case Nil: return _encode_nil(e.writer) + } + + case runtime.Type_Info_Integer: + switch vv in v { + case Simple: return err_conv(_encode_simple(e.writer, vv)) + case Negative_U8: return _encode_u8(e.writer, u8(vv), .Negative) + case Negative_U16: return err_conv(_encode_u16(e, u16(vv), .Negative)) + case Negative_U32: return err_conv(_encode_u32(e, u32(vv), .Negative)) + case Negative_U64: return err_conv(_encode_u64(e, u64(vv), .Negative)) + } + + switch i in a { + case i8: return _encode_uint(e.writer, _int_to_uint(i)) + case i16: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i32: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i64: return err_conv(_encode_uint(e, _int_to_uint(i))) + case i128: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + case int: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + + case u8: return _encode_uint(e.writer, i) + case u16: return err_conv(_encode_uint(e, i)) + case u32: return err_conv(_encode_uint(e, i)) + case u64: return err_conv(_encode_uint(e, i)) + case u128: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + case uint: return err_conv(_encode_uint(e, u64(i))) + case uintptr: return err_conv(_encode_uint(e, u64(i))) + + case i16le: return err_conv(_encode_uint(e, _int_to_uint(i16(i)))) + case i32le: return err_conv(_encode_uint(e, _int_to_uint(i32(i)))) + case i64le: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + case i128le: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + + case u16le: return err_conv(_encode_uint(e, u16(i))) + case u32le: return err_conv(_encode_uint(e, u32(i))) + case u64le: return err_conv(_encode_uint(e, u64(i))) + case u128le: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + + case i16be: return err_conv(_encode_uint(e, _int_to_uint(i16(i)))) + case i32be: return err_conv(_encode_uint(e, _int_to_uint(i32(i)))) + case i64be: return err_conv(_encode_uint(e, _int_to_uint(i64(i)))) + case i128be: return err_conv(_encode_uint(e, _int_to_uint(i128(i)) or_return)) + + case u16be: return err_conv(_encode_uint(e, u16(i))) + case u32be: return err_conv(_encode_uint(e, u32(i))) + case u64be: return err_conv(_encode_uint(e, u64(i))) + case u128be: return err_conv(_encode_uint(e, _u128_to_u64(u128(i)) or_return)) + } + + case runtime.Type_Info_Rune: + buf, w := utf8.encode_rune(a.(rune)) + return err_conv(_encode_text(e, string(buf[:w]))) + + case runtime.Type_Info_Float: + switch f in a { + case f16: return _encode_f16(e.writer, f) + case f32: return _encode_f32(e, f) + case f64: return _encode_f64(e, f) + + case f16le: return _encode_f16(e.writer, f16(f)) + case f32le: return _encode_f32(e, f32(f)) + case f64le: return _encode_f64(e, f64(f)) + + case f16be: return _encode_f16(e.writer, f16(f)) + case f32be: return _encode_f32(e, f32(f)) + case f64be: return _encode_f64(e, f64(f)) + } + + case runtime.Type_Info_Complex: + switch z in a { + case complex32: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + case complex64: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + case complex128: + arr: [2]Value = {real(z), imag(z)} + return err_conv(_encode_array(e, arr[:])) + } + + case runtime.Type_Info_Quaternion: + switch q in a { + case quaternion64: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + case quaternion128: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + case quaternion256: + arr: [4]Value = {imag(q), jmag(q), kmag(q), real(q)} + return err_conv(_encode_array(e, arr[:])) + } + + case runtime.Type_Info_String: + switch s in a { + case string: return err_conv(_encode_text(e, s)) + case cstring: return err_conv(_encode_text(e, string(s))) + } + + case runtime.Type_Info_Boolean: + val: bool + switch b in a { + case bool: return _encode_bool(e.writer, b) + case b8: return _encode_bool(e.writer, bool(b)) + case b16: return _encode_bool(e.writer, bool(b)) + case b32: return _encode_bool(e.writer, bool(b)) + case b64: return _encode_bool(e.writer, bool(b)) + } + + case runtime.Type_Info_Array: + if info.elem.id == byte { + raw := ([^]byte)(v.data) + return err_conv(_encode_bytes(e, raw[:info.count])) + } + + err_conv(_encode_u64(e, u64(info.count), .Array)) or_return + for i in 0.. (res: [10]byte) { + e := e + builder := strings.builder_from_slice(res[:]) + e.writer = strings.to_stream(&builder) + + assert(_encode_u64(e, u64(len(str)), .Text) == nil) + res[9] = u8(len(builder.buf)) + assert(res[9] < 10) + return + } + + Encoded_Entry_Fast :: struct($T: typeid) { + pre_key: [10]byte, + key: T, + val_idx: uintptr, + } + + Encoded_Entry :: struct { + key: ^[dynamic]byte, + val_idx: uintptr, + } + + switch info.key.id { + case string: + entries := make([dynamic]Encoded_Entry_Fast(^[]byte), 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + a, b := a, b + pre_cmp := slice.Ordering(bytes.compare(a.pre_key[:a.pre_key[9]], b.pre_key[:b.pre_key[9]])) + if pre_cmp != .Equal { + return pre_cmp + } + + return slice.Ordering(bytes.compare(a.key^, b.key^)) + }) + + for &entry in entries { + io.write_full(e.writer, entry.pre_key[:entry.pre_key[9]]) or_return + io.write_full(e.writer, entry.key^) or_return + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + + case cstring: + entries := make([dynamic]Encoded_Entry_Fast(^cstring), 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + a, b := a, b + pre_cmp := slice.Ordering(bytes.compare(a.pre_key[:a.pre_key[9]], b.pre_key[:b.pre_key[9]])) + if pre_cmp != .Equal { + return pre_cmp + } + + ab := transmute([]byte)string(a.key^) + bb := transmute([]byte)string(b.key^) + return slice.Ordering(bytes.compare(ab, bb)) + }) + + for &entry in entries { + io.write_full(e.writer, entry.pre_key[:entry.pre_key[9]]) or_return + io.write_full(e.writer, transmute([]byte)string(entry.key^)) or_return + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + + case: + entries := make([dynamic]Encoded_Entry, 0, map_cap, context.temp_allocator) or_return + defer delete(entries) + + for bucket_index in 0.. slice.Ordering { + return slice.Ordering(bytes.compare(a.key[:], b.key[:])) + }) + + for entry in entries { + io.write_full(e.writer, entry.key[:]) or_return + delete(entry.key^) + + value := rawptr(runtime.map_cell_index_dynamic(vs, info.map_info.vs, entry.val_idx)) + marshal_into(e, any{ value, info.value.id }) or_return + } + return + } + } + + case runtime.Type_Info_Struct: + switch vv in v { + case Tag: return err_conv(_encode_tag(e, vv)) + } + + err_conv(_encode_u16(e, u16(len(info.names)), .Map)) or_return + + marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, name: string, i: int) -> Marshal_Error { + err_conv(_encode_text(e, name)) or_return + + id := info.types[i].id + data := rawptr(uintptr(v.data) + info.offsets[i]) + field_any := any{data, id} + + if tag := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor_tag")); tag != "" { + if impl, ok := _tag_implementations_id[tag]; ok { + return impl->marshal(e, field_any) + } + + nr, ok := strconv.parse_u64_of_base(tag, 10) + if !ok { return .Invalid_CBOR_Tag } + + if impl, nok := _tag_implementations_nr[nr]; nok { + return impl->marshal(e, field_any) + } + + err_conv(_encode_u64(e, nr, .Tag)) or_return + } + + return marshal_into(e, field_any) + } + + field_name :: #force_inline proc(info: runtime.Type_Info_Struct, i: int) -> string { + if cbor_name := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor")); cbor_name != "" { + return cbor_name + } else { + return info.names[i] + } + } + + if .Deterministic_Map_Sorting in e.flags { + Name :: struct { + name: string, + field: int, + } + entries := make([dynamic]Name, 0, len(info.names), context.temp_allocator) or_return + defer delete(entries) + + for name, i in info.names { + append(&entries, Name{field_name(info, i), i}) or_return + } + + // Sort lexicographic on the bytes of the key. + slice.sort_by_cmp(entries[:], proc(a, b: Name) -> slice.Ordering { + return slice.Ordering(bytes.compare(transmute([]byte)a.name, transmute([]byte)b.name)) + }) + + for entry in entries { + marshal_entry(e, info, v, entry.name, entry.field) or_return + } + } else { + for name, i in info.names { + marshal_entry(e, info, v, field_name(info, i), i) or_return + } + } + return + + case runtime.Type_Info_Union: + switch vv in v { + case Value: return err_conv(encode(e, vv)) + } + + tag := reflect.get_union_variant_raw_tag(v) + if v.data == nil || tag <= 0 { + return _encode_nil(e.writer) + } + id := info.variants[tag-1].id + return marshal_into(e, any{v.data, id}) + + case runtime.Type_Info_Enum: + return marshal_into(e, any{v.data, info.base.id}) + + case runtime.Type_Info_Bit_Set: + do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying) + switch ti.size * 8 { + case 0: + return _encode_u8(e.writer, 0) + case 8: + x := (^u8)(v.data)^ + return _encode_u8(e.writer, x) + case 16: + x := (^u16)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u16(e, x)) + case 32: + x := (^u32)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u32(e, x)) + case 64: + x := (^u64)(v.data)^ + if do_byte_swap { x = intrinsics.byte_swap(x) } + return err_conv(_encode_u64(e, x)) + case: + panic("unknown bit_size size") + } + } + + return _unsupported(v.id, nil) +} diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin new file mode 100644 index 000000000..54bc7dd15 --- /dev/null +++ b/core/encoding/cbor/tags.odin @@ -0,0 +1,361 @@ +package cbor + +import "core:encoding/base64" +import "core:io" +import "core:math" +import "core:math/big" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:strings" +import "core:time" + +// Tags defined in RFC 7049 that we provide implementations for. + +// UTC time in seconds, unmarshalled into a `core:time` `time.Time` or integer. +TAG_EPOCH_TIME_NR :: 1 +TAG_EPOCH_TIME_ID :: "epoch" + +// Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. +TAG_UNSIGNED_BIG_NR :: 2 +// Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. +TAG_NEGATIVE_BIG_NR :: 3 + +// TAG_DECIMAL_FRACTION :: 4 // NOTE: We could probably implement this with `math/fixed`. + +// Sometimes it is beneficial to carry an embedded CBOR data item that is not meant to be decoded +// immediately at the time the enclosing data item is being decoded. Tag number 24 (CBOR data item) +// can be used to tag the embedded byte string as a single data item encoded in CBOR format. +TAG_CBOR_NR :: 24 +TAG_CBOR_ID :: "cbor" + +// The contents of this tag are base64 encoded during marshal and decoded during unmarshal. +TAG_BASE64_NR :: 34 +TAG_BASE64_ID :: "base64" + +// A tag that is used to detect the contents of a binary buffer (like a file) are CBOR. +// This tag would wrap everything else, decoders can then check for this header and see if the +// given content is definitely CBOR. +TAG_SELF_DESCRIBED_CBOR :: 55799 + +// A tag implementation that handles marshals and unmarshals for the tag it is registered on. +Tag_Implementation :: struct { + data: rawptr, + unmarshal: Tag_Unmarshal_Proc, + marshal: Tag_Marshal_Proc, +} + +// Procedure responsible for umarshalling the tag out of the reader into the given `any`. +Tag_Unmarshal_Proc :: #type proc(self: ^Tag_Implementation, r: io.Reader, tag_nr: Tag_Number, v: any) -> Unmarshal_Error + +// Procedure responsible for marshalling the tag in the given `any` into the given encoder. +Tag_Marshal_Proc :: #type proc(self: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error + +// When encountering a tag in the CBOR being unmarshalled, the implementation is used to unmarshal it. +// When encountering a struct tag like `cbor_tag:"Tag_Number"`, the implementation is used to marshal it. +_tag_implementations_nr: map[Tag_Number]Tag_Implementation + +// Same as the number implementations but friendlier to use as a struct tag. +// Instead of `cbor_tag:"34"` you can use `cbor_tag:"base64"`. +_tag_implementations_id: map[string]Tag_Implementation + +// Tag implementations that are always used by a type, if that type is encountered in marshal it +// will rely on the implementation to marshal it. +// +// This is good for types that don't make sense or can't marshal in its default form. +_tag_implementations_type: map[typeid]Tag_Implementation + +// Register a custom tag implementation to be used when marshalling that type and unmarshalling that tag number. +tag_register_type :: proc(impl: Tag_Implementation, nr: Tag_Number, type: typeid) { + _tag_implementations_nr[nr] = impl + _tag_implementations_type[type] = impl +} + +// Register a custom tag implementation to be used when marshalling that tag number or marshalling +// a field with the struct tag `cbor_tag:"nr"`. +tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string) { + _tag_implementations_nr[nr] = impl + _tag_implementations_id[id] = impl +} + +// Controls initialization of default tag implementations. +// JS and WASI default to a panic allocator so we don't want to do it on those. +INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, ODIN_OS != .JS && ODIN_OS != .WASI) + +@(private, init, disabled=!INITIALIZE_DEFAULT_TAGS) +tags_initialize_defaults :: proc() { + tags_register_defaults() +} + +// Registers tags that have implementations provided by this package. +// This is done by default and can be controlled with the `CBOR_INITIALIZE_DEFAULT_TAGS` define. +tags_register_defaults :: proc() { + // NOTE: Not registering this the other way around, user can opt-in using the `cbor_tag:"1"` struct + // tag instead, it would lose precision and marshalling the `time.Time` struct normally is valid. + tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID) + + // Use the struct tag `cbor_tag:"34"` to have your field encoded in a base64. + tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID) + + // Use the struct tag `cbor_tag:"24"` to keep a non-decoded field of raw CBOR. + tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID) + + // These following tags are registered at the type level and don't require an opt-in struct tag. + // Encoding these types on its own make no sense or no data is lost to encode it. + + tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_UNSIGNED_BIG_NR, big.Int) + tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_NEGATIVE_BIG_NR, big.Int) +} + +// Tag number 1 contains a numerical value counting the number of seconds from 1970-01-01T00:00Z +// in UTC time to the represented point in civil time. +// +// See RFC 8949 section 3.4.2. +@(private) +tag_time_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + #partial switch hdr { + case .U8, .U16, .U32, .U64, .Neg_U8, .Neg_U16, .Neg_U32, .Neg_U64: + switch &dst in v { + case time.Time: + i: i64 + _unmarshal_any_ptr(r, &i, hdr) or_return + dst = time.unix(i64(i), 0) + return + case: + return _unmarshal_value(r, v, hdr) + } + + case .F16, .F32, .F64: + switch &dst in v { + case time.Time: + f: f64 + _unmarshal_any_ptr(r, &f, hdr) or_return + whole, fract := math.modf(f) + dst = time.unix(i64(whole), i64(fract * 1e9)) + return + case: + return _unmarshal_value(r, v, hdr) + } + + case: + maj, add := _header_split(hdr) + if maj == .Other { + i := _decode_tiny_u8(add) or_return + + switch &dst in v { + case time.Time: + dst = time.unix(i64(i), 0) + case: + if _assign_int(v, i) { return } + } + } + + // Only numbers and floats are allowed in this tag. + return .Bad_Tag_Value + } + + return _unsupported(v, hdr) +} + +@(private) +tag_time_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + switch vv in v { + case time.Time: + // NOTE: we lose precision here, which is one of the reasons for this tag being opt-in. + i := time.time_to_unix(vv) + + _encode_u8(e.writer, TAG_EPOCH_TIME_NR, .Tag) or_return + return err_conv(_encode_uint(e, _int_to_uint(i))) + case: + unreachable() + } +} + +@(private) +tag_big_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, tnr: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + maj, add := _header_split(hdr) + if maj != .Bytes { + // Only bytes are supported in this tag. + return .Bad_Tag_Value + } + + switch &dst in v { + case big.Int: + bytes := err_conv(_decode_bytes(r, add)) or_return + defer delete(bytes) + + if err := big.int_from_bytes_big(&dst, bytes); err != nil { + return .Bad_Tag_Value + } + + if tnr == TAG_NEGATIVE_BIG_NR { + dst.sign = .Negative + } + + return + } + + return _unsupported(v, hdr) +} + +@(private) +tag_big_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + switch &vv in v { + case big.Int: + if !big.int_is_initialized(&vv) { + _encode_u8(e.writer, TAG_UNSIGNED_BIG_NR, .Tag) or_return + return _encode_u8(e.writer, 0, .Bytes) + } + + // NOTE: using the panic_allocator because all procedures should only allocate if the Int + // is uninitialized (which we checked). + + is_neg, err := big.is_negative(&vv, mem.panic_allocator()) + assert(err == nil, "only errors if not initialized, which has been checked") + + tnr: u8 = TAG_NEGATIVE_BIG_NR if is_neg else TAG_UNSIGNED_BIG_NR + _encode_u8(e.writer, tnr, .Tag) or_return + + size_in_bytes, berr := big.int_to_bytes_size(&vv, false, mem.panic_allocator()) + assert(berr == nil, "only errors if not initialized, which has been checked") + assert(size_in_bytes >= 0) + + err_conv(_encode_u64(e, u64(size_in_bytes), .Bytes)) or_return + + for offset := (size_in_bytes*8)-8; offset >= 0; offset -= 8 { + bits, derr := big.int_bitfield_extract(&vv, offset, 8, mem.panic_allocator()) + assert(derr == nil, "only errors if not initialized or invalid argument (offset and count), which won't happen") + + io.write_full(e.writer, {u8(bits & 255)}) or_return + } + return nil + + case: unreachable() + } +} + +@(private) +tag_cbor_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> Unmarshal_Error { + hdr := _decode_header(r) or_return + major, add := _header_split(hdr) + #partial switch major { + case .Bytes: + ti := reflect.type_info_base(type_info_of(v.id)) + return _unmarshal_bytes(r, v, ti, hdr, add) + + case: return .Bad_Tag_Value + } +} + +@(private) +tag_cbor_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + _encode_u8(e.writer, TAG_CBOR_NR, .Tag) or_return + ti := runtime.type_info_base(type_info_of(v.id)) + #partial switch t in ti.variant { + case runtime.Type_Info_String: + return marshal_into(e, v) + case runtime.Type_Info_Array: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case runtime.Type_Info_Slice: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case runtime.Type_Info_Dynamic_Array: + elem_base := reflect.type_info_base(t.elem) + if elem_base.id != byte { return .Bad_Tag_Value } + return marshal_into(e, v) + case: + return .Bad_Tag_Value + } +} + +// NOTE: this could probably be more efficient by decoding bytes from CBOR and then from base64 at the same time. +@(private) +tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(r) or_return + major, add := _header_split(hdr) + #partial switch major { + case .Text: + ti := reflect.type_info_base(type_info_of(v.id)) + _unmarshal_bytes(r, v, ti, hdr, add) or_return + #partial switch t in ti.variant { + case runtime.Type_Info_String: + switch t.is_cstring { + case true: + str := string((^cstring)(v.data)^) + decoded := base64.decode(str) or_return + (^cstring)(v.data)^ = strings.clone_to_cstring(string(decoded)) or_return + delete(decoded) + delete(str) + case false: + str := (^string)(v.data)^ + decoded := base64.decode(str) or_return + (^string)(v.data)^ = string(decoded) + delete(str) + } + return + + case runtime.Type_Info_Array: + raw := ([^]byte)(v.data) + decoded := base64.decode(string(raw[:t.count])) or_return + copy(raw[:t.count], decoded) + delete(decoded) + return + + case runtime.Type_Info_Slice: + raw := (^[]byte)(v.data) + decoded := base64.decode(string(raw^)) or_return + delete(raw^) + raw^ = decoded + return + + case runtime.Type_Info_Dynamic_Array: + raw := (^mem.Raw_Dynamic_Array)(v.data) + str := string(((^[dynamic]byte)(v.data)^)[:]) + + decoded := base64.decode(str) or_return + delete(str) + + raw.data = raw_data(decoded) + raw.len = len(decoded) + raw.cap = len(decoded) + return + + case: unreachable() + } + + case: return .Bad_Tag_Value + } +} + +@(private) +tag_base64_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error { + _encode_u8(e.writer, TAG_BASE64_NR, .Tag) or_return + + ti := runtime.type_info_base(type_info_of(v.id)) + a := any{v.data, ti.id} + + bytes: []byte + switch val in a { + case string: bytes = transmute([]byte)val + case cstring: bytes = transmute([]byte)string(val) + case []byte: bytes = val + case [dynamic]byte: bytes = val[:] + case: + #partial switch t in ti.variant { + case runtime.Type_Info_Array: + if t.elem.id != byte { return .Bad_Tag_Value } + bytes = ([^]byte)(v.data)[:t.count] + case: + return .Bad_Tag_Value + } + } + + out_len := base64.encoded_length(bytes) + err_conv(_encode_u64(e, u64(out_len), .Text)) or_return + return base64.encode_into(e.writer, bytes) +} diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin new file mode 100644 index 000000000..0da8e3f2a --- /dev/null +++ b/core/encoding/cbor/unmarshal.odin @@ -0,0 +1,832 @@ +package cbor + +import "core:intrinsics" +import "core:io" +import "core:mem" +import "core:reflect" +import "core:runtime" +import "core:strings" +import "core:unicode/utf8" + +// `strings` is only used in poly procs, but -vet thinks it is fully unused. +_ :: strings + +/* +Unmarshals the given CBOR into the given pointer using reflection. +Types that require allocation are allocated using the given allocator. + +Some temporary allocations are done on the `context.temp_allocator`, but, if you want to, +this can be set to a "normal" allocator, because the necessary `delete` and `free` calls are still made. +This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. +*/ +unmarshal :: proc { + unmarshal_from_reader, + unmarshal_from_string, +} + +// Unmarshals from a reader, see docs on the proc group `Unmarshal` for more info. +unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { + return _unmarshal_any_ptr(r, ptr, allocator=allocator) +} + +// Unmarshals from a string, see docs on the proc group `Unmarshal` for more info. +unmarshal_from_string :: proc(s: string, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { + sr: strings.Reader + r := strings.to_reader(&sr, s) + return _unmarshal_any_ptr(r, ptr, allocator=allocator) +} + +_unmarshal_any_ptr :: proc(r: io.Reader, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { + context.allocator = allocator + v := v + + if v == nil || v.id == nil { + return .Invalid_Parameter + } + + v = reflect.any_base(v) + ti := type_info_of(v.id) + if !reflect.is_pointer(ti) || ti.id == rawptr { + return .Non_Pointer_Parameter + } + + data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id} + return _unmarshal_value(r, data, hdr.? or_else (_decode_header(r) or_return)) +} + +_unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_Error) { + v := v + ti := reflect.type_info_base(type_info_of(v.id)) + + // If it's a union with only one variant, then treat it as that variant + if u, ok := ti.variant.(reflect.Type_Info_Union); ok && len(u.variants) == 1 { + #partial switch hdr { + case .Nil, .Undefined, nil: // no-op. + case: + variant := u.variants[0] + v.id = variant.id + ti = reflect.type_info_base(variant) + if !reflect.is_pointer_internally(variant) { + tag := any{rawptr(uintptr(v.data) + u.tag_offset), u.tag_type.id} + assert(_assign_int(tag, 1)) + } + } + } + + // Allow generic unmarshal by doing it into a `Value`. + switch &dst in v { + case Value: + dst = err_conv(decode(r, hdr)) or_return + return + } + + switch hdr { + case .U8: + decoded := _decode_u8(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U16: + decoded := _decode_u16(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U32: + decoded := _decode_u32(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .U64: + decoded := _decode_u64(r) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr) } + return + + case .Neg_U8: + decoded := Negative_U8(_decode_u8(r) or_return) + + switch &dst in v { + case Negative_U8: + dst = decoded + return + case Negative_U16: + dst = Negative_U16(decoded) + return + case Negative_U32: + dst = Negative_U32(decoded) + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U16: + decoded := Negative_U16(_decode_u16(r) or_return) + + switch &dst in v { + case Negative_U16: + dst = decoded + return + case Negative_U32: + dst = Negative_U32(decoded) + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U32: + decoded := Negative_U32(_decode_u32(r) or_return) + + switch &dst in v { + case Negative_U32: + dst = decoded + return + case Negative_U64: + dst = Negative_U64(decoded) + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Neg_U64: + decoded := Negative_U64(_decode_u64(r) or_return) + + switch &dst in v { + case Negative_U64: + dst = decoded + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr) } + return + + case .Simple: + decoded := _decode_simple(r) or_return + + // NOTE: Because this is a special type and not to be treated as a general integer, + // We only put the value of it in fields that are explicitly of type `Simple`. + switch &dst in v { + case Simple: + dst = decoded + return + case: + return _unsupported(v, hdr) + } + + case .F16: + decoded := _decode_f16(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .F32: + decoded := _decode_f32(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .F64: + decoded := _decode_f64(r) or_return + if !_assign_float(v, decoded) { return _unsupported(v, hdr) } + return + + case .True: + if !_assign_bool(v, true) { return _unsupported(v, hdr) } + return + + case .False: + if !_assign_bool(v, false) { return _unsupported(v, hdr) } + return + + case .Nil, .Undefined: + mem.zero(v.data, ti.size) + return + + case .Break: + return .Break + } + + maj, add := _header_split(hdr) + switch maj { + case .Unsigned: + decoded := _decode_tiny_u8(add) or_return + if !_assign_int(v, decoded) { return _unsupported(v, hdr, add) } + return + + case .Negative: + decoded := Negative_U8(_decode_tiny_u8(add) or_return) + + switch &dst in v { + case Negative_U8: + dst = decoded + return + } + + if reflect.is_unsigned(ti) { return _unsupported(v, hdr, add) } + + if !_assign_int(v, negative_to_int(decoded)) { return _unsupported(v, hdr, add) } + return + + case .Other: + decoded := _decode_tiny_simple(add) or_return + + // NOTE: Because this is a special type and not to be treated as a general integer, + // We only put the value of it in fields that are explicitly of type `Simple`. + switch &dst in v { + case Simple: + dst = decoded + return + case: + return _unsupported(v, hdr, add) + } + + case .Tag: + switch &dst in v { + case ^Tag: + tval := err_conv(_decode_tag_ptr(r, add)) or_return + if t, is_tag := tval.(^Tag); is_tag { + dst = t + return + } + + destroy(tval) + return .Bad_Tag_Value + case Tag: + t := err_conv(_decode_tag(r, add)) or_return + if t, is_tag := t.?; is_tag { + dst = t + return + } + + return .Bad_Tag_Value + } + + nr := err_conv(_decode_tag_nr(r, add)) or_return + + // Custom tag implementations. + if impl, ok := _tag_implementations_nr[nr]; ok { + return impl->unmarshal(r, nr, v) + } else { + // Discard the tag info and unmarshal as its value. + return _unmarshal_value(r, v, _decode_header(r) or_return) + } + + return _unsupported(v, hdr, add) + + case .Bytes: return _unmarshal_bytes(r, v, ti, hdr, add) + case .Text: return _unmarshal_string(r, v, ti, hdr, add) + case .Array: return _unmarshal_array(r, v, ti, hdr, add) + case .Map: return _unmarshal_map(r, v, ti, hdr, add) + + case: return .Bad_Major + } +} + +_unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + #partial switch t in ti.variant { + case reflect.Type_Info_String: + bytes := err_conv(_decode_bytes(r, add)) or_return + + if t.is_cstring { + raw := (^cstring)(v.data) + assert_safe_for_cstring(string(bytes)) + raw^ = cstring(raw_data(bytes)) + } else { + // String has same memory layout as a slice, so we can directly use it as a slice. + raw := (^mem.Raw_String)(v.data) + raw^ = transmute(mem.Raw_String)bytes + } + + return + + case reflect.Type_Info_Slice: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes := err_conv(_decode_bytes(r, add)) or_return + raw := (^mem.Raw_Slice)(v.data) + raw^ = transmute(mem.Raw_Slice)bytes + return + + case reflect.Type_Info_Dynamic_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes := err_conv(_decode_bytes(r, add)) or_return + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(bytes) + raw.len = len(bytes) + raw.cap = len(bytes) + raw.allocator = context.allocator + return + + case reflect.Type_Info_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + bytes: []byte; { + context.allocator = context.temp_allocator + bytes = err_conv(_decode_bytes(r, add)) or_return + } + defer delete(bytes, context.temp_allocator) + + if len(bytes) > t.count { return _unsupported(v, hdr) } + + // Copy into array type, delete original. + slice := ([^]byte)(v.data)[:len(bytes)] + n := copy(slice, bytes) + assert(n == len(bytes)) + return + } + + return _unsupported(v, hdr) +} + +_unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + #partial switch t in ti.variant { + case reflect.Type_Info_String: + text := err_conv(_decode_text(r, add)) or_return + + if t.is_cstring { + raw := (^cstring)(v.data) + + assert_safe_for_cstring(text) + raw^ = cstring(raw_data(text)) + } else { + raw := (^string)(v.data) + raw^ = text + } + return + + // Enum by its variant name. + case reflect.Type_Info_Enum: + context.allocator = context.temp_allocator + text := err_conv(_decode_text(r, add)) or_return + defer delete(text, context.temp_allocator) + + for name, i in t.names { + if name == text { + if !_assign_int(any{v.data, ti.id}, t.values[i]) { return _unsupported(v, hdr) } + return + } + } + + case reflect.Type_Info_Rune: + context.allocator = context.temp_allocator + text := err_conv(_decode_text(r, add)) or_return + defer delete(text, context.temp_allocator) + + r := (^rune)(v.data) + dr, n := utf8.decode_rune(text) + if dr == utf8.RUNE_ERROR || n < len(text) { + return _unsupported(v, hdr) + } + + r^ = dr + return + } + + return _unsupported(v, hdr) +} + +_unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + + assign_array :: proc( + r: io.Reader, + da: ^mem.Raw_Dynamic_Array, + elemt: ^reflect.Type_Info, + _length: Maybe(int), + growable := true, + ) -> (out_of_space: bool, err: Unmarshal_Error) { + length, has_length := _length.? + for idx: uintptr = 0; !has_length || idx < uintptr(length); idx += 1 { + elem_ptr := rawptr(uintptr(da.data) + idx*uintptr(elemt.size)) + elem := any{elem_ptr, elemt.id} + + hdr := _decode_header(r) or_return + + // Double size if out of capacity. + if da.cap <= da.len { + // Not growable, error out. + if !growable { return true, .Out_Of_Memory } + + cap := 2 * da.cap + ok := runtime.__dynamic_array_reserve(da, elemt.size, elemt.align, cap) + + // NOTE: Might be lying here, but it is at least an allocator error. + if !ok { return false, .Out_Of_Memory } + } + + err = _unmarshal_value(r, elem, hdr) + if !has_length && err == .Break { break } + if err != nil { return } + + da.len += 1 + } + + return false, nil + } + + // Allow generically storing the values array. + switch &dst in v { + case ^Array: + dst = err_conv(_decode_array_ptr(r, add)) or_return + return + case Array: + dst = err_conv(_decode_array(r, add)) or_return + return + } + + #partial switch t in ti.variant { + case reflect.Type_Info_Slice: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + defer if err != nil { mem.free_bytes(data) } + + da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator } + + assign_array(r, &da, t.elem, _length) or_return + + if da.len < da.cap { + // Ignoring an error here, but this is not critical to succeed. + _ = runtime.__dynamic_array_shrink(&da, t.elem.size, t.elem.align, da.len) + } + + raw := (^mem.Raw_Slice)(v.data) + raw.data = da.data + raw.len = da.len + return + + case reflect.Type_Info_Dynamic_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + + data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + defer if err != nil { mem.free_bytes(data) } + + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(data) + raw.len = 0 + raw.cap = length + raw.allocator = context.allocator + + _ = assign_array(r, raw, t.elem, _length) or_return + return + + case reflect.Type_Info_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else t.count + + if !unknown && length > t.count { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } + + out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Enumerated_Array: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else t.count + + if !unknown && length > t.count { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } + + out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Complex: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else 2 + + if !unknown && length > 2 { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, 2, context.allocator } + + info: ^runtime.Type_Info + switch ti.id { + case complex32: info = type_info_of(f16) + case complex64: info = type_info_of(f32) + case complex128: info = type_info_of(f64) + case: unreachable() + } + + out_of_space := assign_array(r, &da, info, 2, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case reflect.Type_Info_Quaternion: + _length, unknown := err_conv(_decode_container_length(r, add)) or_return + length := _length.? or_else 4 + + if !unknown && length > 4 { + return _unsupported(v, hdr) + } + + da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, 4, context.allocator } + + info: ^runtime.Type_Info + switch ti.id { + case quaternion64: info = type_info_of(f16) + case quaternion128: info = type_info_of(f32) + case quaternion256: info = type_info_of(f64) + case: unreachable() + } + + out_of_space := assign_array(r, &da, info, 4, growable=false) or_return + if out_of_space { return _unsupported(v, hdr) } + return + + case: return _unsupported(v, hdr) + } +} + +_unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + + decode_key :: proc(r: io.Reader, v: any) -> (k: string, err: Unmarshal_Error) { + entry_hdr := _decode_header(r) or_return + entry_maj, entry_add := _header_split(entry_hdr) + #partial switch entry_maj { + case .Text: + k = err_conv(_decode_text(r, entry_add)) or_return + return + case .Bytes: + bytes := err_conv(_decode_bytes(r, entry_add)) or_return + k = string(bytes) + return + case: + err = _unsupported(v, entry_hdr) + return + } + } + + // Allow generically storing the map array. + switch &dst in v { + case ^Map: + dst = err_conv(_decode_map_ptr(r, add)) or_return + return + case Map: + dst = err_conv(_decode_map(r, add)) or_return + return + } + + #partial switch t in ti.variant { + case reflect.Type_Info_Struct: + if t.is_raw_union { + return _unsupported(v, hdr) + } + + length, unknown := err_conv(_decode_container_length(r, add)) or_return + fields := reflect.struct_fields_zipped(ti.id) + + for idx := 0; unknown || idx < length.?; idx += 1 { + // Decode key, keys can only be strings. + key: string; { + context.allocator = context.temp_allocator + if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + break + } else if kerr != nil { + err = kerr + return + } else { + key = keyv + } + } + defer delete(key, context.temp_allocator) + + // Find matching field. + use_field_idx := -1 + { + for field, field_idx in fields { + tag_value := string(reflect.struct_tag_get(field.tag, "cbor")) + if key == tag_value { + use_field_idx = field_idx + break + } + + if key == field.name { + // No break because we want to still check remaining struct tags. + use_field_idx = field_idx + } + } + + // Skips unused map entries. + if use_field_idx < 0 { + continue + } + } + + field := fields[use_field_idx] + name := field.name + ptr := rawptr(uintptr(v.data) + field.offset) + fany := any{ptr, field.type.id} + _unmarshal_value(r, fany, _decode_header(r) or_return) or_return + } + return + + case reflect.Type_Info_Map: + if !reflect.is_string(t.key) { + return _unsupported(v, hdr) + } + + raw_map := (^mem.Raw_Map)(v.data) + if raw_map.allocator.procedure == nil { + raw_map.allocator = context.allocator + } + + defer if err != nil { + _ = runtime.map_free_dynamic(raw_map^, t.map_info) + } + + length, unknown := err_conv(_decode_container_length(r, add)) or_return + if !unknown { + // Reserve space before setting so we can return allocation errors and be efficient on big maps. + new_len := uintptr(runtime.map_len(raw_map^)+length.?) + runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return + } + + // Temporary memory to unmarshal keys into before inserting them into the map. + elem_backing := mem.alloc_bytes_non_zeroed(t.value.size, t.value.align, context.temp_allocator) or_return + defer delete(elem_backing, context.temp_allocator) + + map_backing_value := any{raw_data(elem_backing), t.value.id} + + for idx := 0; unknown || idx < length.?; idx += 1 { + // Decode key, keys can only be strings. + key: string + if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + break + } else if kerr != nil { + err = kerr + return + } else { + key = keyv + } + + if unknown { + // Reserve space for new element so we can return allocator errors. + new_len := uintptr(runtime.map_len(raw_map^)+1) + runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return + } + + mem.zero_slice(elem_backing) + _unmarshal_value(r, map_backing_value, _decode_header(r) or_return) or_return + + key_ptr := rawptr(&key) + key_cstr: cstring + if reflect.is_cstring(t.key) { + assert_safe_for_cstring(key) + key_cstr = cstring(raw_data(key)) + key_ptr = &key_cstr + } + + set_ptr := runtime.__dynamic_map_set_without_hash(raw_map, t.map_info, key_ptr, map_backing_value.data) + // We already reserved space for it, so this shouldn't fail. + assert(set_ptr != nil) + } + return + + case: + return _unsupported(v, hdr) + } +} + +_assign_int :: proc(val: any, i: $T) -> bool { + v := reflect.any_core(val) + + // NOTE: should under/over flow be checked here? `encoding/json` doesn't, but maybe that is a + // less strict encoding?. + + switch &dst in v { + case i8: dst = i8 (i) + case i16: dst = i16 (i) + case i16le: dst = i16le (i) + case i16be: dst = i16be (i) + case i32: dst = i32 (i) + case i32le: dst = i32le (i) + case i32be: dst = i32be (i) + case i64: dst = i64 (i) + case i64le: dst = i64le (i) + case i64be: dst = i64be (i) + case i128: dst = i128 (i) + case i128le: dst = i128le (i) + case i128be: dst = i128be (i) + case u8: dst = u8 (i) + case u16: dst = u16 (i) + case u16le: dst = u16le (i) + case u16be: dst = u16be (i) + case u32: dst = u32 (i) + case u32le: dst = u32le (i) + case u32be: dst = u32be (i) + case u64: dst = u64 (i) + case u64le: dst = u64le (i) + case u64be: dst = u64be (i) + case u128: dst = u128 (i) + case u128le: dst = u128le (i) + case u128be: dst = u128be (i) + case int: dst = int (i) + case uint: dst = uint (i) + case uintptr: dst = uintptr(i) + case: + ti := type_info_of(v.id) + do_byte_swap := is_bit_set_different_endian_to_platform(ti) + #partial switch info in ti.variant { + case runtime.Type_Info_Bit_Set: + switch ti.size * 8 { + case 0: + case 8: + x := (^u8)(v.data) + x^ = u8(i) + case 16: + x := (^u16)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u16(i)) : u16(i) + case 32: + x := (^u32)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u32(i)) : u32(i) + case 64: + x := (^u64)(v.data) + x^ = do_byte_swap ? intrinsics.byte_swap(u64(i)) : u64(i) + case: + panic("unknown bit_size size") + } + case: + return false + } + } + return true +} + +_assign_float :: proc(val: any, f: $T) -> bool { + v := reflect.any_core(val) + + // NOTE: should under/over flow be checked here? `encoding/json` doesn't, but maybe that is a + // less strict encoding?. + + switch &dst in v { + case f16: dst = f16 (f) + case f16le: dst = f16le(f) + case f16be: dst = f16be(f) + case f32: dst = f32 (f) + case f32le: dst = f32le(f) + case f32be: dst = f32be(f) + case f64: dst = f64 (f) + case f64le: dst = f64le(f) + case f64be: dst = f64be(f) + + case complex32: dst = complex(f16(f), 0) + case complex64: dst = complex(f32(f), 0) + case complex128: dst = complex(f64(f), 0) + + case quaternion64: dst = quaternion(f16(f), 0, 0, 0) + case quaternion128: dst = quaternion(f32(f), 0, 0, 0) + case quaternion256: dst = quaternion(f64(f), 0, 0, 0) + + case: return false + } + return true +} + +_assign_bool :: proc(val: any, b: bool) -> bool { + v := reflect.any_core(val) + switch &dst in v { + case bool: dst = bool(b) + case b8: dst = b8 (b) + case b16: dst = b16 (b) + case b32: dst = b32 (b) + case b64: dst = b64 (b) + case: return false + } + return true +} + +// Sanity check that the decoder added a nil byte to the end. +@(private, disabled=ODIN_DISABLE_ASSERT) +assert_safe_for_cstring :: proc(s: string, loc := #caller_location) { + assert(([^]byte)(raw_data(s))[len(s)] == 0, loc = loc) +} diff --git a/core/io/io.odin b/core/io/io.odin index ea8e240b0..961dbe43e 100644 --- a/core/io/io.odin +++ b/core/io/io.odin @@ -29,7 +29,7 @@ Error :: enum i32 { // Invalid_Write means that a write returned an impossible count Invalid_Write, - // Short_Buffer means that a read required a longer buffer than was provided + // Short_Buffer means that a read/write required a longer buffer than was provided Short_Buffer, // No_Progress is returned by some implementations of `io.Reader` when many calls @@ -359,6 +359,29 @@ read_at_least :: proc(r: Reader, buf: []byte, min: int) -> (n: int, err: Error) return } +// write_full writes until the entire contents of `buf` has been written or an error occurs. +write_full :: proc(w: Writer, buf: []byte) -> (n: int, err: Error) { + return write_at_least(w, buf, len(buf)) +} + +// write_at_least writes at least `buf[:min]` to the writer and returns the amount written. +// If an error occurs before writing everything it is returned. +write_at_least :: proc(w: Writer, buf: []byte, min: int) -> (n: int, err: Error) { + if len(buf) < min { + return 0, .Short_Buffer + } + for n < min && err == nil { + nn: int + nn, err = write(w, buf[n:]) + n += nn + } + + if err == nil && n < min { + err = .Short_Write + } + return +} + // copy copies from src to dst till either EOF is reached on src or an error occurs // It returns the number of bytes copied and the first error that occurred whilst copying, if any. copy :: proc(dst: Writer, src: Reader) -> (written: i64, err: Error) { diff --git a/core/net/common.odin b/core/net/common.odin index 2a6f44602..3cd1459a6 100644 --- a/core/net/common.odin +++ b/core/net/common.odin @@ -413,4 +413,5 @@ DNS_Record_Header :: struct #packed { DNS_Host_Entry :: struct { name: string, addr: Address, -} \ No newline at end of file +} + diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index fff344b22..22374f3b5 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -53,6 +53,7 @@ import json "core:encoding/json" import varint "core:encoding/varint" import xml "core:encoding/xml" import endian "core:encoding/endian" +import cbor "core:encoding/cbor" import fmt "core:fmt" import hash "core:hash" @@ -167,6 +168,7 @@ _ :: json _ :: varint _ :: xml _ :: endian +_ :: cbor _ :: fmt _ :: hash _ :: xxhash diff --git a/tests/core/Makefile b/tests/core/Makefile index 1207eeec5..1fca7bf97 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -55,6 +55,7 @@ encoding_test: $(ODIN) run encoding/json $(COMMON) -out:test_json $(ODIN) run encoding/varint $(COMMON) -out:test_varint $(ODIN) run encoding/xml $(COMMON) -out:test_xml + $(ODIN) run encoding/cbor $(COMMON) -out:test_cbor math_test: $(ODIN) run math $(COMMON) $(COLLECTION) -out:test_core_math diff --git a/tests/core/build.bat b/tests/core/build.bat index d5f528f0c..5bf8e1ead 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -40,6 +40,7 @@ rem %PATH_TO_ODIN% run encoding/hxa %COMMON% %COLLECTION% -out:test_hxa.exe | %PATH_TO_ODIN% run encoding/json %COMMON% -out:test_json.exe || exit /b %PATH_TO_ODIN% run encoding/varint %COMMON% -out:test_varint.exe || exit /b %PATH_TO_ODIN% run encoding/xml %COMMON% -out:test_xml.exe || exit /b +%PATH_TO_ODIN% test encoding/cbor %COMMON% -out:test_cbor.exe || exit /b echo --- echo Running core:math/noise tests diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin new file mode 100644 index 000000000..22359d830 --- /dev/null +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -0,0 +1,719 @@ +package test_encoding_cbor + +import "core:bytes" +import "core:encoding/cbor" +import "core:fmt" +import "core:intrinsics" +import "core:math/big" +import "core:mem" +import "core:reflect" +import "core:testing" +import "core:time" + +Foo :: struct { + str: string, + cstr: cstring, + value: cbor.Value, + neg: cbor.Negative_U16, + pos: u16, + iamint: int, + base64: string `cbor_tag:"base64"`, + renamed: f32 `cbor:"renamed :)"`, + now: time.Time `cbor_tag:"1"`, + nowie: time.Time, + child: struct{ + dyn: [dynamic]string, + mappy: map[string]int, + my_integers: [10]int, + }, + my_bytes: []byte, + ennie: FooBar, + ennieb: FooBars, + quat: quaternion64, + comp: complex128, + important: rune, + no: cbor.Nil, + nos: cbor.Undefined, + yes: b32, + biggie: u64, + smallie: cbor.Negative_U64, + onetwenty: i128, + small_onetwenty: i128, + biggest: big.Int, + smallest: big.Int, +} + +FooBar :: enum { + EFoo, + EBar, +} + +FooBars :: bit_set[FooBar; u16] + +@(test) +test_marshalling :: proc(t: ^testing.T) { + tracker: mem.Tracking_Allocator + mem.tracking_allocator_init(&tracker, context.allocator) + context.allocator = mem.tracking_allocator(&tracker) + context.temp_allocator = context.allocator + defer mem.tracking_allocator_destroy(&tracker) + + ev :: testing.expect_value + + { + nice := "16 is a nice number" + now := time.Time{_nsec = 1701117968 * 1e9} + f: Foo = { + str = "Hellope", + cstr = "Hellnope", + value = &cbor.Map{{u8(16), &nice}, {u8(32), u8(69)}}, + neg = 68, + pos = 1212, + iamint = -256, + base64 = nice, + renamed = 123123.125, + + now = now, + nowie = now, + + child = { + dyn = [dynamic]string{"one", "two", "three", "four"}, + mappy = map[string]int{"one" = 1, "two" = 2, "three" = 3, "four" = 4}, + my_integers = [10]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }, + + my_bytes = []byte{}, + + ennie = .EFoo, + ennieb = {.EBar}, + + quat = quaternion(16, 17, 18, 19), + comp = complex(32, 33), + + important = '!', + + no = cbor.Nil(uintptr(3)), + + yes = true, + + biggie = max(u64), + smallie = cbor.Negative_U64(max(u64)), + onetwenty = i128(12345), + small_onetwenty = -i128(max(u64)), + } + + big.atoi(&f.biggest, "1234567891011121314151617181920") + big.atoi(&f.smallest, "-1234567891011121314151617181920") + + defer { + delete(f.child.dyn) + delete(f.child.mappy) + big.destroy(&f.biggest) + big.destroy(&f.smallest) + } + + data, err := cbor.marshal(f, cbor.ENCODE_FULLY_DETERMINISTIC) + ev(t, err, nil) + defer delete(data) + + decoded, derr := cbor.decode_string(string(data)) + ev(t, derr, nil) + defer cbor.destroy(decoded) + + diagnosis, eerr := cbor.diagnose(decoded) + ev(t, eerr, nil) + defer delete(diagnosis) + + ev(t, diagnosis, `{ + "base64": 34("MTYgaXMgYSBuaWNlIG51bWJlcg=="), + "biggest": 2(h'f951a9fd3c158afdff08ab8e0'), + "biggie": 18446744073709551615, + "child": { + "dyn": [ + "one", + "two", + "three", + "four" + ], + "mappy": { + "one": 1, + "two": 2, + "four": 4, + "three": 3 + }, + "my_integers": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10 + ] + }, + "comp": [ + 32.0000, + 33.0000 + ], + "cstr": "Hellnope", + "ennie": 0, + "ennieb": 2, + "iamint": -256, + "important": "!", + "my_bytes": h'', + "neg": -69, + "no": nil, + "nos": undefined, + "now": 1(1701117968), + "nowie": { + "_nsec": 1701117968000000000 + }, + "onetwenty": 12345, + "pos": 1212, + "quat": [ + 17.0000, + 18.0000, + 19.0000, + 16.0000 + ], + "renamed :)": 123123.12500000, + "small_onetwenty": -18446744073709551615, + "smallest": 3(h'f951a9fd3c158afdff08ab8e0'), + "smallie": -18446744073709551616, + "str": "Hellope", + "value": { + 16: "16 is a nice number", + 32: 69 + }, + "yes": true +}`) + + backf: Foo + uerr := cbor.unmarshal(string(data), &backf) + ev(t, uerr, nil) + defer { + delete(backf.str) + delete(backf.cstr) + cbor.destroy(backf.value) + delete(backf.base64) + + for e in backf.child.dyn { delete(e) } + delete(backf.child.dyn) + + for k in backf.child.mappy { delete(k) } + delete(backf.child.mappy) + + delete(backf.my_bytes) + + big.destroy(&backf.biggest) + big.destroy(&backf.smallest) + } + + ev(t, backf.str, f.str) + ev(t, backf.cstr, f.cstr) + + #partial switch v in backf.value { + case ^cbor.Map: + for entry, i in v { + fm := f.value.(^cbor.Map) + ev(t, entry.key, fm[i].key) + + if str, is_str := entry.value.(^cbor.Text); is_str { + ev(t, str^, fm[i].value.(^cbor.Text)^) + } else { + ev(t, entry.value, fm[i].value) + } + } + + case: testing.error(t, v) + } + + ev(t, backf.neg, f.neg) + ev(t, backf.iamint, f.iamint) + ev(t, backf.base64, f.base64) + ev(t, backf.renamed, f.renamed) + ev(t, backf.now, f.now) + ev(t, backf.nowie, f.nowie) + for e, i in f.child.dyn { ev(t, backf.child.dyn[i], e) } + for key, value in f.child.mappy { ev(t, backf.child.mappy[key], value) } + ev(t, backf.child.my_integers, f.child.my_integers) + ev(t, len(backf.my_bytes), 0) + ev(t, len(backf.my_bytes), len(f.my_bytes)) + ev(t, backf.ennie, f.ennie) + ev(t, backf.ennieb, f.ennieb) + ev(t, backf.quat, f.quat) + ev(t, backf.comp, f.comp) + ev(t, backf.important, f.important) + ev(t, backf.no, nil) + ev(t, backf.nos, nil) + ev(t, backf.yes, f.yes) + ev(t, backf.biggie, f.biggie) + ev(t, backf.smallie, f.smallie) + ev(t, backf.onetwenty, f.onetwenty) + ev(t, backf.small_onetwenty, f.small_onetwenty) + + s_equals, s_err := big.equals(&backf.smallest, &f.smallest) + ev(t, s_err, nil) + if !s_equals { + testing.errorf(t, "smallest: %v does not equal %v", big.itoa(&backf.smallest), big.itoa(&f.smallest)) + } + + b_equals, b_err := big.equals(&backf.biggest, &f.biggest) + ev(t, b_err, nil) + if !b_equals { + testing.errorf(t, "biggest: %v does not equal %v", big.itoa(&backf.biggest), big.itoa(&f.biggest)) + } + } + + for _, leak in tracker.allocation_map { + testing.errorf(t, "%v leaked %m\n", leak.location, leak.size) + } + + for bad_free in tracker.bad_free_array { + testing.errorf(t, "%v allocation %p was freed badly\n", bad_free.location, bad_free.memory) + } +} + +@(test) +test_decode_unsigned :: proc(t: ^testing.T) { + expect_decoding(t, "\x00", "0", u8) + expect_decoding(t, "\x01", "1", u8) + expect_decoding(t, "\x0a", "10", u8) + expect_decoding(t, "\x17", "23", u8) + expect_decoding(t, "\x18\x18", "24", u8) + expect_decoding(t, "\x18\x19", "25", u8) + expect_decoding(t, "\x18\x64", "100", u8) + expect_decoding(t, "\x19\x03\xe8", "1000", u16) + expect_decoding(t, "\x1a\x00\x0f\x42\x40", "1000000", u32) // Million. + expect_decoding(t, "\x1b\x00\x00\x00\xe8\xd4\xa5\x10\x00", "1000000000000", u64) // Trillion. + expect_decoding(t, "\x1b\xff\xff\xff\xff\xff\xff\xff\xff", "18446744073709551615", u64) // max(u64). +} + +@(test) +test_encode_unsigned :: proc(t: ^testing.T) { + expect_encoding(t, u8(0), "\x00") + expect_encoding(t, u8(1), "\x01") + expect_encoding(t, u8(10), "\x0a") + expect_encoding(t, u8(23), "\x17") + expect_encoding(t, u8(24), "\x18\x18") + expect_encoding(t, u8(25), "\x18\x19") + expect_encoding(t, u8(100), "\x18\x64") + expect_encoding(t, u16(1000), "\x19\x03\xe8") + expect_encoding(t, u32(1000000), "\x1a\x00\x0f\x42\x40") // Million. + expect_encoding(t, u64(1000000000000), "\x1b\x00\x00\x00\xe8\xd4\xa5\x10\x00") // Trillion. + expect_encoding(t, u64(18446744073709551615), "\x1b\xff\xff\xff\xff\xff\xff\xff\xff") // max(u64). +} + +@(test) +test_decode_negative :: proc(t: ^testing.T) { + expect_decoding(t, "\x20", "-1", cbor.Negative_U8) + expect_decoding(t, "\x29", "-10", cbor.Negative_U8) + expect_decoding(t, "\x38\x63", "-100", cbor.Negative_U8) + expect_decoding(t, "\x39\x03\xe7", "-1000", cbor.Negative_U16) + + // Negative max(u64). + expect_decoding(t, "\x3b\xff\xff\xff\xff\xff\xff\xff\xff", "-18446744073709551616", cbor.Negative_U64) +} + +@(test) +test_encode_negative :: proc(t: ^testing.T) { + expect_encoding(t, cbor.Negative_U8(0), "\x20") + expect_encoding(t, cbor.Negative_U8(9), "\x29") + expect_encoding(t, cbor.Negative_U8(99), "\x38\x63") + expect_encoding(t, cbor.Negative_U16(999), "\x39\x03\xe7") + + // Negative max(u64). + expect_encoding(t, cbor.Negative_U64(18446744073709551615), "\x3b\xff\xff\xff\xff\xff\xff\xff\xff") +} + +@(test) +test_decode_simples :: proc(t: ^testing.T) { + expect_decoding(t, "\xf4", "false", bool) + expect_decoding(t, "\xf5", "true", bool) + expect_decoding(t, "\xf6", "nil", cbor.Nil) + expect_decoding(t, "\xf7", "undefined", cbor.Undefined) + + expect_decoding(t, "\xf0", "simple(16)", cbor.Simple) + expect_decoding(t, "\xf8\xff", "simple(255)", cbor.Atom) +} + +@(test) +test_encode_simples :: proc(t: ^testing.T) { + expect_encoding(t, bool(false), "\xf4") + expect_encoding(t, bool(true), "\xf5") + expect_encoding(t, cbor.Nil{}, "\xf6") // default value for a distinct rawptr, in this case Nil. + expect_encoding(t, cbor.Undefined{}, "\xf7") // default value for a distinct rawptr, in this case Undefined. + + expect_encoding(t, cbor.Simple(16), "\xf0") // simple(16) + expect_encoding(t, cbor.Simple(255), "\xf8\xff") // simple(255) +} + +@(test) +test_decode_floats :: proc(t: ^testing.T) { + expect_float(t, "\xf9\x00\x00", f16(0.0)) + expect_float(t, "\xf9\x80\x00", f16(-0.0)) + expect_float(t, "\xf9\x3c\x00", f16(1.0)) + expect_float(t, "\xfb\x3f\xf1\x99\x99\x99\x99\x99\x9a", f64(1.1)) + expect_float(t, "\xf9\x3e\x00", f16(1.5)) + expect_float(t, "\xf9\x7b\xff", f16(65504.0)) + expect_float(t, "\xfa\x47\xc3\x50\x00", f32(100000.0)) + expect_float(t, "\xfa\x7f\x7f\xff\xff", f32(3.4028234663852886e+38)) + expect_float(t, "\xfb\x7e\x37\xe4\x3c\x88\x00\x75\x9c", f64(1.0e+300)) + expect_float(t, "\xf9\x00\x01", f16(5.960464477539063e-8)) + expect_float(t, "\xf9\x04\x00", f16(0.00006103515625)) + expect_float(t, "\xf9\xc4\x00", f16(-4.0)) + expect_float(t, "\xfb\xc0\x10\x66\x66\x66\x66\x66\x66", f64(-4.1)) + expect_decoding(t, "\xf9\x7c\x00", "+Inf", f16) + expect_decoding(t, "\xf9\x7e\x00", "NaN", f16) + expect_decoding(t, "\xf9\xfc\x00", "-Inf", f16) + expect_decoding(t, "\xfa\x7f\x80\x00\x00", "+Inf", f32) + expect_decoding(t, "\xfa\x7f\xc0\x00\x00", "NaN", f32) + expect_decoding(t, "\xfa\xff\x80\x00\x00", "-Inf", f32) + expect_decoding(t, "\xfb\x7f\xf0\x00\x00\x00\x00\x00\x00", "+Inf", f64) + expect_decoding(t, "\xfb\x7f\xf8\x00\x00\x00\x00\x00\x00", "NaN", f64) + expect_decoding(t, "\xfb\xff\xf0\x00\x00\x00\x00\x00\x00", "-Inf", f64) +} + +@(test) +test_encode_floats :: proc(t: ^testing.T) { + expect_encoding(t, f16(0.0), "\xf9\x00\x00") + expect_encoding(t, f16(-0.0), "\xf9\x80\x00") + expect_encoding(t, f16(1.0), "\xf9\x3c\x00") + expect_encoding(t, f64(1.1), "\xfb\x3f\xf1\x99\x99\x99\x99\x99\x9a") + expect_encoding(t, f16(1.5), "\xf9\x3e\x00") + expect_encoding(t, f16(65504.0), "\xf9\x7b\xff") + expect_encoding(t, f32(100000.0), "\xfa\x47\xc3\x50\x00") + expect_encoding(t, f32(3.4028234663852886e+38), "\xfa\x7f\x7f\xff\xff") + expect_encoding(t, f64(1.0e+300), "\xfb\x7e\x37\xe4\x3c\x88\x00\x75\x9c") + expect_encoding(t, f16(5.960464477539063e-8), "\xf9\x00\x01") + expect_encoding(t, f16(0.00006103515625), "\xf9\x04\x00") + expect_encoding(t, f16(-4.0), "\xf9\xc4\x00") + expect_encoding(t, f64(-4.1), "\xfb\xc0\x10\x66\x66\x66\x66\x66\x66") +} + +@(test) +test_decode_bytes :: proc(t: ^testing.T) { + expect_decoding(t, "\x40", "h''", ^cbor.Bytes) + expect_decoding(t, "\x44\x01\x02\x03\x04", "h'1234'", ^cbor.Bytes) + + // Indefinite lengths + + expect_decoding(t, "\x5f\x42\x01\x02\x43\x03\x04\x05\xff", "h'12345'", ^cbor.Bytes) +} + +@(test) +test_encode_bytes :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Bytes{}, "\x40") + expect_encoding(t, &cbor.Bytes{1, 2, 3, 4}, "\x44\x01\x02\x03\x04") + + // Indefinite lengths + + expect_streamed_encoding(t, "\x5f\x42\x01\x02\x43\x03\x04\x05\xff", &cbor.Bytes{1, 2}, &cbor.Bytes{3, 4, 5}) +} + +@(test) +test_decode_strings :: proc(t: ^testing.T) { + expect_decoding(t, "\x60", `""`, ^cbor.Text) + expect_decoding(t, "\x61\x61", `"a"`, ^cbor.Text) + expect_decoding(t, "\x64\x49\x45\x54\x46", `"IETF"`, ^cbor.Text) + expect_decoding(t, "\x62\x22\x5c", `""\"`, ^cbor.Text) + expect_decoding(t, "\x62\xc3\xbc", `"ü"`, ^cbor.Text) + expect_decoding(t, "\x63\xe6\xb0\xb4", `"水"`, ^cbor.Text) + expect_decoding(t, "\x64\xf0\x90\x85\x91", `"𐅑"`, ^cbor.Text) + + // Indefinite lengths + + expect_decoding(t, "\x7f\x65\x73\x74\x72\x65\x61\x64\x6d\x69\x6e\x67\xff", `"streaming"`, ^cbor.Text) +} + +@(test) +test_encode_strings :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Text{}, "\x60") + + a := "a" + expect_encoding(t, &a, "\x61\x61") + + b := "IETF" + expect_encoding(t, &b, "\x64\x49\x45\x54\x46") + + c := "\"\\" + expect_encoding(t, &c, "\x62\x22\x5c") + + d := "ü" + expect_encoding(t, &d, "\x62\xc3\xbc") + + e := "水" + expect_encoding(t, &e, "\x63\xe6\xb0\xb4") + + f := "𐅑" + expect_encoding(t, &f, "\x64\xf0\x90\x85\x91") + + // Indefinite lengths + + sa := "strea" + sb := "ming" + expect_streamed_encoding(t, "\x7f\x65\x73\x74\x72\x65\x61\x64\x6d\x69\x6e\x67\xff", &sa, &sb) +} + +@(test) +test_decode_lists :: proc(t: ^testing.T) { + expect_decoding(t, "\x80", "[]", ^cbor.Array) + expect_decoding(t, "\x83\x01\x02\x03", "[1, 2, 3]", ^cbor.Array) + expect_decoding(t, "\x83\x01\x82\x02\x03\x82\x04\x05", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x98\x19\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x18\x18\x19", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]", ^cbor.Array) + expect_decoding(t, "\x82\x61\x61\xa1\x61\x62\x61\x63", `["a", {"b": "c"}]`, ^cbor.Array) + + // Indefinite lengths + + expect_decoding(t, "\x9f\xff", "[]", ^cbor.Array) + expect_decoding(t, "\x9f\x01\x82\x02\x03\x9f\x04\x05\xff\xff", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x9f\x01\x82\x02\x03\x82\x04\x05\xff", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x83\x01\x82\x02\x03\x9f\x04\x05\xff", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x83\x01\x9f\x02\x03\xff\x82\x04\x05", "[1, [2, 3], [4, 5]]", ^cbor.Array) + expect_decoding(t, "\x9f\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x18\x18\x19\xff", "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]", ^cbor.Array) + expect_decoding(t, "\x82\x61\x61\xbf\x61\x62\x61\x63\xff", `["a", {"b": "c"}]`, ^cbor.Array) +} + +@(test) +test_encode_lists :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Array{}, "\x80") + expect_encoding(t, &cbor.Array{u8(1), u8(2), u8(3)}, "\x83\x01\x02\x03") + expect_encoding(t, &cbor.Array{u8(1), &cbor.Array{u8(2), u8(3)}, &cbor.Array{u8(4), u8(5)}}, "\x83\x01\x82\x02\x03\x82\x04\x05") + expect_encoding(t, &cbor.Array{u8(1), u8(2), u8(3), u8(4), u8(5), u8(6), u8(7), u8(8), u8(9), u8(10), u8(11), u8(12), u8(13), u8(14), u8(15), u8(16), u8(17), u8(18), u8(19), u8(20), u8(21), u8(22), u8(23), u8(24), u8(25)}, "\x98\x19\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x18\x18\x19") + + { + a := "a" + b := "b" + c := "c" + expect_encoding(t, &cbor.Array{&a, &cbor.Map{{&b, &c}}}, "\x82\x61\x61\xa1\x61\x62\x61\x63") + } + + // Indefinite lengths + + expect_streamed_encoding(t, "\x9f\xff", &cbor.Array{}) + + { + bytes.buffer_reset(&buf) + + err: cbor.Encode_Error + err = cbor.encode_stream_begin(stream, .Array) + testing.expect_value(t, err, nil) + + { + err = cbor.encode_stream_array_item(encoder, u8(1)) + testing.expect_value(t, err, nil) + + err = cbor.encode_stream_array_item(encoder, &cbor.Array{u8(2), u8(3)}) + testing.expect_value(t, err, nil) + + err = cbor.encode_stream_begin(stream, .Array) + testing.expect_value(t, err, nil) + + { + err = cbor.encode_stream_array_item(encoder, u8(4)) + testing.expect_value(t, err, nil) + + err = cbor.encode_stream_array_item(encoder, u8(5)) + testing.expect_value(t, err, nil) + } + + err = cbor.encode_stream_end(stream) + testing.expect_value(t, err, nil) + } + + err = cbor.encode_stream_end(stream) + testing.expect_value(t, err, nil) + + testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x9f\x01\x82\x02\x03\x9f\x04\x05\xff\xff"))) + } + + { + bytes.buffer_reset(&buf) + + err: cbor.Encode_Error + err = cbor._encode_u8(stream, 2, .Array) + testing.expect_value(t, err, nil) + + a := "a" + err = cbor.encode(encoder, &a) + testing.expect_value(t, err, nil) + + { + err = cbor.encode_stream_begin(stream, .Map) + testing.expect_value(t, err, nil) + + b := "b" + c := "c" + err = cbor.encode_stream_map_entry(encoder, &b, &c) + testing.expect_value(t, err, nil) + + err = cbor.encode_stream_end(stream) + testing.expect_value(t, err, nil) + } + + testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x82\x61\x61\xbf\x61\x62\x61\x63\xff"))) + } +} + +@(test) +test_decode_maps :: proc(t: ^testing.T) { + expect_decoding(t, "\xa0", "{}", ^cbor.Map) + expect_decoding(t, "\xa2\x01\x02\x03\x04", "{1: 2, 3: 4}", ^cbor.Map) + expect_decoding(t, "\xa2\x61\x61\x01\x61\x62\x82\x02\x03", `{"a": 1, "b": [2, 3]}`, ^cbor.Map) + expect_decoding(t, "\xa5\x61\x61\x61\x41\x61\x62\x61\x42\x61\x63\x61\x43\x61\x64\x61\x44\x61\x65\x61\x45", `{"a": "A", "b": "B", "c": "C", "d": "D", "e": "E"}`, ^cbor.Map) + + // Indefinite lengths + + expect_decoding(t, "\xbf\x61\x61\x01\x61\x62\x9f\x02\x03\xff\xff", `{"a": 1, "b": [2, 3]}`, ^cbor.Map) + expect_decoding(t, "\xbf\x63\x46\x75\x6e\xf5\x63\x41\x6d\x74\x21\xff", `{"Fun": true, "Amt": -2}`, ^cbor.Map) +} + +@(test) +test_encode_maps :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Map{}, "\xa0") + expect_encoding(t, &cbor.Map{{u8(1), u8(2)}, {u8(3), u8(4)}}, "\xa2\x01\x02\x03\x04") + + a := "a" + b := "b" + // NOTE: also tests the deterministic nature because it has to swap/sort the entries. + expect_encoding(t, &cbor.Map{{&b, &cbor.Array{u8(2), u8(3)}}, {&a, u8(1)}}, "\xa2\x61\x61\x01\x61\x62\x82\x02\x03") + + fun := "Fun" + amt := "Amt" + expect_streamed_encoding(t, "\xbf\x63\x46\x75\x6e\xf5\x63\x41\x6d\x74\x21\xff", &cbor.Map{{&fun, true}, {&amt, cbor.Negative_U8(1)}}) +} + +@(test) +test_decode_tags :: proc(t: ^testing.T) { + // Tag number 2 (unsigned bignumber), value bytes, max(u64) + 1. + expect_tag(t, "\xc2\x49\x01\x00\x00\x00\x00\x00\x00\x00\x00", cbor.TAG_UNSIGNED_BIG_NR, "2(h'100000000')") + + // Tag number 3 (negative bignumber), value bytes, negative max(u64) - 1. + expect_tag(t, "\xc3\x49\x01\x00\x00\x00\x00\x00\x00\x00\x00", cbor.TAG_NEGATIVE_BIG_NR, "3(h'100000000')") + + expect_tag(t, "\xc1\x1a\x51\x4b\x67\xb0", cbor.TAG_EPOCH_TIME_NR, "1(1363896240)") + expect_tag(t, "\xc1\xfb\x41\xd4\x52\xd9\xec\x20\x00\x00", cbor.TAG_EPOCH_TIME_NR, "1(1363896240.5000000000000000)") + expect_tag(t, "\xd8\x18\x45\x64\x49\x45\x54\x46", cbor.TAG_CBOR_NR, "24(h'6449455446')") +} + +@(test) +test_encode_tags :: proc(t: ^testing.T) { + expect_encoding(t, &cbor.Tag{cbor.TAG_UNSIGNED_BIG_NR, &cbor.Bytes{1, 0, 0, 0, 0, 0, 0, 0, 0}}, "\xc2\x49\x01\x00\x00\x00\x00\x00\x00\x00\x00") + expect_encoding(t, &cbor.Tag{cbor.TAG_EPOCH_TIME_NR, u32(1363896240)}, "\xc1\x1a\x51\x4b\x67\xb0") + expect_encoding(t, &cbor.Tag{cbor.TAG_EPOCH_TIME_NR, f64(1363896240.500)}, "\xc1\xfb\x41\xd4\x52\xd9\xec\x20\x00\x00") +} + +// Helpers + +buf: bytes.Buffer +stream := bytes.buffer_to_stream(&buf) +encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream} + +expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: typeid, loc := #caller_location) { + bytes.buffer_reset(&buf) + bytes.buffer_write_string(&buf, encoded) + + res, err := cbor.decode(stream) + defer cbor.destroy(res) + + testing.expect_value(t, reflect.union_variant_typeid(res), type, loc) + testing.expect_value(t, err, nil, loc) + + str := cbor.diagnose(res, padding=-1) + defer delete(str) + + testing.expect_value(t, str, decoded, loc) +} + +expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_decoded: string, loc := #caller_location) { + bytes.buffer_reset(&buf) + bytes.buffer_write_string(&buf, encoded) + + res, err := cbor.decode(stream) + defer cbor.destroy(res) + + testing.expect_value(t, err, nil, loc) + + if tag, is_tag := res.(^cbor.Tag); is_tag { + testing.expect_value(t, tag.number, nr, loc) + + str := cbor.diagnose(tag, padding=-1) + defer delete(str) + + testing.expect_value(t, str, value_decoded, loc) + } else { + testing.errorf(t, "Value %#v is not a tag", res, loc) + } +} + +expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #caller_location) where intrinsics.type_is_float(T) { + bytes.buffer_reset(&buf) + bytes.buffer_write_string(&buf, encoded) + + res, err := cbor.decode(stream) + defer cbor.destroy(res) + + testing.expect_value(t, reflect.union_variant_typeid(res), typeid_of(T), loc) + testing.expect_value(t, err, nil, loc) + + #partial switch r in res { + case f16: + when T == f16 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + case f32: + when T == f32 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + case f64: + when T == f64 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + case: + unreachable() + } +} + +expect_encoding :: proc(t: ^testing.T, val: cbor.Value, encoded: string, loc := #caller_location) { + bytes.buffer_reset(&buf) + + err := cbor.encode(encoder, val) + testing.expect_value(t, err, nil, loc) + testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) +} + +expect_streamed_encoding :: proc(t: ^testing.T, encoded: string, values: ..cbor.Value, loc := #caller_location) { + bytes.buffer_reset(&buf) + + for value, i in values { + err: cbor.Encode_Error + err2: cbor.Encode_Error + #partial switch v in value { + case ^cbor.Bytes: + if i == 0 { err = cbor.encode_stream_begin(stream, .Bytes) } + err2 = cbor._encode_bytes(encoder, v^) + case ^cbor.Text: + if i == 0 { err = cbor.encode_stream_begin(stream, .Text) } + err2 = cbor._encode_text(encoder, v^) + case ^cbor.Array: + if i == 0 { err = cbor.encode_stream_begin(stream, .Array) } + for item in v { + err2 = cbor.encode_stream_array_item(encoder, item) + if err2 != nil { break } + } + case ^cbor.Map: + err = cbor.encode_stream_begin(stream, .Map) + for item in v { + err2 = cbor.encode_stream_map_entry(encoder, item.key, item.value) + if err2 != nil { break } + } + case: + testing.errorf(t, "%v does not support streamed encoding", reflect.union_variant_typeid(value)) + } + + testing.expect_value(t, err, nil, loc) + testing.expect_value(t, err2, nil, loc) + } + + err := cbor.encode_stream_end(stream) + testing.expect_value(t, err, nil, loc) + + testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) +} From b6c47e796390924faabd236204bc620ea35c1d13 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 21:40:41 +0100 Subject: [PATCH 02/66] encoding/base64: add decode_into, add tests --- core/encoding/base64/base64.odin | 139 +++++++++++++++++-------- tests/core/Makefile | 3 + tests/core/build.bat | 2 + tests/core/encoding/base64/base64.odin | 60 +++++++++++ 4 files changed, 158 insertions(+), 46 deletions(-) create mode 100644 tests/core/encoding/base64/base64.odin diff --git a/core/encoding/base64/base64.odin b/core/encoding/base64/base64.odin index 793f22c57..535d457d5 100644 --- a/core/encoding/base64/base64.odin +++ b/core/encoding/base64/base64.odin @@ -44,21 +44,48 @@ DEC_TABLE := [128]int { } encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> (encoded: string, err: mem.Allocator_Error) #optional_allocator_error { - out_length := encoded_length(data) + out_length := encoded_len(data) if out_length == 0 { return } - out: strings.Builder - strings.builder_init(&out, 0, out_length, allocator) or_return - + out := strings.builder_make(0, out_length, allocator) or_return ioerr := encode_into(strings.to_stream(&out), data, ENC_TBL) - assert(ioerr == nil) + + assert(ioerr == nil, "string builder should not IO error") + assert(strings.builder_cap(out) == out_length, "buffer resized, `encoded_len` was wrong") return strings.to_string(out), nil } -encoded_length :: #force_inline proc(data: []byte) -> int { +encode_into :: proc(w: io.Writer, data: []byte, ENC_TBL := ENC_TABLE) -> io.Error { + length := len(data) + if length == 0 { + return nil + } + + c0, c1, c2, block: int + out: [4]byte + for i := 0; i < length; i += 3 { + #no_bounds_check { + c0, c1, c2 = int(data[i]), -1, -1 + + if i + 1 < length { c1 = int(data[i + 1]) } + if i + 2 < length { c2 = int(data[i + 2]) } + + block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) + + out[0] = ENC_TBL[block >> 18 & 63] + out[1] = ENC_TBL[block >> 12 & 63] + out[2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] + out[3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] + } + io.write_full(w, out[:]) or_return + } + return nil +} + +encoded_len :: proc(data: []byte) -> int { length := len(data) if length == 0 { return 0 @@ -67,48 +94,30 @@ encoded_length :: #force_inline proc(data: []byte) -> int { return ((4 * length / 3) + 3) &~ 3 } -encode_into :: proc(w: io.Writer, data: []byte, ENC_TBL := ENC_TABLE) -> (err: io.Error) #no_bounds_check { - length := len(data) - if length == 0 { - return - } +decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> (decoded: []byte, err: mem.Allocator_Error) #optional_allocator_error { + out_length := decoded_len(data) - c0, c1, c2, block: int + out := strings.builder_make(0, out_length, allocator) or_return + ioerr := decode_into(strings.to_stream(&out), data, DEC_TBL) - for i, d := 0, 0; i < length; i, d = i + 3, d + 4 { - c0, c1, c2 = int(data[i]), -1, -1 + assert(ioerr == nil, "string builder should not IO error") + assert(strings.builder_cap(out) == out_length, "buffer resized, `decoded_len` was wrong") - if i + 1 < length { c1 = int(data[i + 1]) } - if i + 2 < length { c2 = int(data[i + 2]) } - - block = (c0 << 16) | (max(c1, 0) << 8) | max(c2, 0) - - out: [4]byte - out[0] = ENC_TBL[block >> 18 & 63] - out[1] = ENC_TBL[block >> 12 & 63] - out[2] = c1 == -1 ? PADDING : ENC_TBL[block >> 6 & 63] - out[3] = c2 == -1 ? PADDING : ENC_TBL[block & 63] - - #bounds_check { io.write_full(w, out[:]) or_return } - } - return + return out.buf[:], nil } -decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> (out: []byte, err: mem.Allocator_Error) #optional_allocator_error { - #no_bounds_check { - length := len(data) - if length == 0 { - return - } +decode_into :: proc(w: io.Writer, data: string, DEC_TBL := DEC_TABLE) -> io.Error { + length := decoded_len(data) + if length == 0 { + return nil + } - pad_count := data[length - 1] == PADDING ? (data[length - 2] == PADDING ? 2 : 1) : 0 - out_length := ((length * 6) >> 3) - pad_count - out = make([]byte, out_length, allocator) or_return - - c0, c1, c2, c3: int - b0, b1, b2: int - - for i, j := 0, 0; i < length; i, j = i + 4, j + 3 { + c0, c1, c2, c3: int + b0, b1, b2: int + buf: [3]byte + i, j: int + for ; j + 3 <= length; i, j = i + 4, j + 3 { + #no_bounds_check { c0 = DEC_TBL[data[i]] c1 = DEC_TBL[data[i + 1]] c2 = DEC_TBL[data[i + 2]] @@ -118,10 +127,48 @@ decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocato b1 = (c1 << 4) | (c2 >> 2) b2 = (c2 << 6) | c3 - out[j] = byte(b0) - out[j + 1] = byte(b1) - out[j + 2] = byte(b2) + buf[0] = byte(b0) + buf[1] = byte(b1) + buf[2] = byte(b2) } - return + + io.write_full(w, buf[:]) or_return } + + rest := length - j + if rest > 0 { + #no_bounds_check { + c0 = DEC_TBL[data[i]] + c1 = DEC_TBL[data[i + 1]] + c2 = DEC_TBL[data[i + 2]] + + b0 = (c0 << 2) | (c1 >> 4) + b1 = (c1 << 4) | (c2 >> 2) + } + + switch rest { + case 1: io.write_byte(w, byte(b0)) or_return + case 2: io.write_full(w, {byte(b0), byte(b1)}) or_return + } + } + + return nil +} + +decoded_len :: proc(data: string) -> int { + length := len(data) + if length == 0 { + return 0 + } + + padding: int + if data[length - 1] == PADDING { + if length > 1 && data[length - 2] == PADDING { + padding = 2 + } else { + padding = 1 + } + } + + return ((length * 6) >> 3) - padding } diff --git a/tests/core/Makefile b/tests/core/Makefile index 1fca7bf97..3fa38cd34 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -51,11 +51,14 @@ noise_test: $(ODIN) run math/noise $(COMMON) -out:test_noise encoding_test: +<<<<<<< HEAD $(ODIN) run encoding/hxa $(COMMON) $(COLLECTION) -out:test_hxa $(ODIN) run encoding/json $(COMMON) -out:test_json $(ODIN) run encoding/varint $(COMMON) -out:test_varint $(ODIN) run encoding/xml $(COMMON) -out:test_xml $(ODIN) run encoding/cbor $(COMMON) -out:test_cbor + $(ODIN) run encoding/hex $(COMMON) -out:test_hex + $(ODIN) run encoding/base64 $(COMMON) -out:test_base64 math_test: $(ODIN) run math $(COMMON) $(COLLECTION) -out:test_core_math diff --git a/tests/core/build.bat b/tests/core/build.bat index 5bf8e1ead..b9fc4e828 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -41,6 +41,8 @@ rem %PATH_TO_ODIN% run encoding/hxa %COMMON% %COLLECTION% -out:test_hxa.exe | %PATH_TO_ODIN% run encoding/varint %COMMON% -out:test_varint.exe || exit /b %PATH_TO_ODIN% run encoding/xml %COMMON% -out:test_xml.exe || exit /b %PATH_TO_ODIN% test encoding/cbor %COMMON% -out:test_cbor.exe || exit /b +%PATH_TO_ODIN% run encoding/hex %COMMON% -out:test_hex.exe || exit /b +%PATH_TO_ODIN% run encoding/base64 %COMMON% -out:test_base64.exe || exit /b echo --- echo Running core:math/noise tests diff --git a/tests/core/encoding/base64/base64.odin b/tests/core/encoding/base64/base64.odin new file mode 100644 index 000000000..41dbba683 --- /dev/null +++ b/tests/core/encoding/base64/base64.odin @@ -0,0 +1,60 @@ +package test_encoding_base64 + +import "core:encoding/base64" +import "core:fmt" +import "core:intrinsics" +import "core:os" +import "core:reflect" +import "core:testing" + +TEST_count := 0 +TEST_fail := 0 + +when ODIN_TEST { + expect_value :: testing.expect_value + +} else { + expect_value :: proc(t: ^testing.T, value, expected: $T, loc := #caller_location) -> bool where intrinsics.type_is_comparable(T) { + TEST_count += 1 + ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected) + if !ok { + TEST_fail += 1 + fmt.printf("[%v] expected %v, got %v\n", loc, expected, value) + } + return ok + } +} + +main :: proc() { + t := testing.T{} + + test_encoding(&t) + test_decoding(&t) + + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + if TEST_fail > 0 { + os.exit(1) + } +} + +@(test) +test_encoding :: proc(t: ^testing.T) { + expect_value(t, base64.encode(transmute([]byte)string("")), "") + expect_value(t, base64.encode(transmute([]byte)string("f")), "Zg==") + expect_value(t, base64.encode(transmute([]byte)string("fo")), "Zm8=") + expect_value(t, base64.encode(transmute([]byte)string("foo")), "Zm9v") + expect_value(t, base64.encode(transmute([]byte)string("foob")), "Zm9vYg==") + expect_value(t, base64.encode(transmute([]byte)string("fooba")), "Zm9vYmE=") + expect_value(t, base64.encode(transmute([]byte)string("foobar")), "Zm9vYmFy") +} + +@(test) +test_decoding :: proc(t: ^testing.T) { + expect_value(t, string(base64.decode("")), "") + expect_value(t, string(base64.decode("Zg==")), "f") + expect_value(t, string(base64.decode("Zm8=")), "fo") + expect_value(t, string(base64.decode("Zm9v")), "foo") + expect_value(t, string(base64.decode("Zm9vYg==")), "foob") + expect_value(t, string(base64.decode("Zm9vYmE=")), "fooba") + expect_value(t, string(base64.decode("Zm9vYmFy")), "foobar") +} From 363769d4d3de601a64e7e4bd1e6b0e744c75671c Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 21:42:06 +0100 Subject: [PATCH 03/66] encoding/cbor: cleanup base64 tag --- core/encoding/cbor/tags.odin | 112 +++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 50 deletions(-) diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 54bc7dd15..ef3ef45f2 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -213,20 +213,20 @@ tag_big_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_E // is uninitialized (which we checked). is_neg, err := big.is_negative(&vv, mem.panic_allocator()) - assert(err == nil, "only errors if not initialized, which has been checked") + assert(err == nil, "should only error if not initialized, which has been checked") tnr: u8 = TAG_NEGATIVE_BIG_NR if is_neg else TAG_UNSIGNED_BIG_NR _encode_u8(e.writer, tnr, .Tag) or_return size_in_bytes, berr := big.int_to_bytes_size(&vv, false, mem.panic_allocator()) - assert(berr == nil, "only errors if not initialized, which has been checked") + assert(berr == nil, "should only error if not initialized, which has been checked") assert(size_in_bytes >= 0) err_conv(_encode_u64(e, u64(size_in_bytes), .Bytes)) or_return for offset := (size_in_bytes*8)-8; offset >= 0; offset -= 8 { bits, derr := big.int_bitfield_extract(&vv, offset, 8, mem.panic_allocator()) - assert(derr == nil, "only errors if not initialized or invalid argument (offset and count), which won't happen") + assert(derr == nil, "should only error if not initialized or invalid argument (offset and count), which won't happen") io.write_full(e.writer, {u8(bits & 255)}) or_return } @@ -273,63 +273,75 @@ tag_cbor_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_ } } -// NOTE: this could probably be more efficient by decoding bytes from CBOR and then from base64 at the same time. @(private) tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { - hdr := _decode_header(r) or_return + hdr := _decode_header(r) or_return major, add := _header_split(hdr) - #partial switch major { - case .Text: - ti := reflect.type_info_base(type_info_of(v.id)) - _unmarshal_bytes(r, v, ti, hdr, add) or_return - #partial switch t in ti.variant { - case runtime.Type_Info_String: - switch t.is_cstring { - case true: - str := string((^cstring)(v.data)^) - decoded := base64.decode(str) or_return - (^cstring)(v.data)^ = strings.clone_to_cstring(string(decoded)) or_return - delete(decoded) - delete(str) - case false: - str := (^string)(v.data)^ - decoded := base64.decode(str) or_return - (^string)(v.data)^ = string(decoded) - delete(str) - } - return + ti := reflect.type_info_base(type_info_of(v.id)) - case runtime.Type_Info_Array: - raw := ([^]byte)(v.data) - decoded := base64.decode(string(raw[:t.count])) or_return - copy(raw[:t.count], decoded) - delete(decoded) - return + if major != .Text && major != .Bytes { + return .Bad_Tag_Value + } - case runtime.Type_Info_Slice: - raw := (^[]byte)(v.data) - decoded := base64.decode(string(raw^)) or_return - delete(raw^) - raw^ = decoded - return + bytes: string; { + context.allocator = context.temp_allocator + bytes = string(err_conv(_decode_bytes(r, add)) or_return) + } + defer delete(bytes, context.temp_allocator) - case runtime.Type_Info_Dynamic_Array: - raw := (^mem.Raw_Dynamic_Array)(v.data) - str := string(((^[dynamic]byte)(v.data)^)[:]) + #partial switch t in ti.variant { + case reflect.Type_Info_String: - decoded := base64.decode(str) or_return - delete(str) + if t.is_cstring { + length := base64.decoded_len(bytes) + builder := strings.builder_make(0, length+1) + base64.decode_into(strings.to_stream(&builder), bytes) or_return - raw.data = raw_data(decoded) - raw.len = len(decoded) - raw.cap = len(decoded) - return - - case: unreachable() + raw := (^cstring)(v.data) + raw^ = cstring(raw_data(builder.buf)) + } else { + raw := (^string)(v.data) + raw^ = string(base64.decode(bytes) or_return) } - case: return .Bad_Tag_Value + return + + case reflect.Type_Info_Slice: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + raw := (^[]byte)(v.data) + raw^ = base64.decode(bytes) or_return + return + + case reflect.Type_Info_Dynamic_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + decoded := base64.decode(bytes) or_return + + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(decoded) + raw.len = len(decoded) + raw.cap = len(decoded) + raw.allocator = context.allocator + return + + case reflect.Type_Info_Array: + elem_base := reflect.type_info_base(t.elem) + + if elem_base.id != byte { return _unsupported(v, hdr) } + + if base64.decoded_len(bytes) > t.count { return _unsupported(v, hdr) } + + slice := ([^]byte)(v.data)[:len(bytes)] + copy(slice, base64.decode(bytes) or_return) + return } + + return _unsupported(v, hdr) } @(private) @@ -355,7 +367,7 @@ tag_base64_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marsha } } - out_len := base64.encoded_length(bytes) + out_len := base64.encoded_len(bytes) err_conv(_encode_u64(e, u64(out_len), .Text)) or_return return base64.encode_into(e.writer, bytes) } From d77ae9ababb539e7b48258c94c3b55fc46e62919 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 21:42:33 +0100 Subject: [PATCH 04/66] encoding/cbor: fully support marshal/unmarshal of unions --- core/encoding/cbor/marshal.odin | 26 +- core/encoding/cbor/tags.odin | 9 + core/encoding/cbor/unmarshal.odin | 76 +++++- tests/core/encoding/cbor/test_core_cbor.odin | 260 ++++++++++++++++--- 4 files changed, 325 insertions(+), 46 deletions(-) diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index aab2defb2..a5d5efb3e 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -506,8 +506,32 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { if v.data == nil || tag <= 0 { return _encode_nil(e.writer) } + id := info.variants[tag-1].id - return marshal_into(e, any{v.data, id}) + if len(info.variants) == 1 { + id := info.variants[tag-1].id + return marshal_into(e, any{v.data, id}) + } + + // Encode a non-nil multi-variant union as the `TAG_OBJECT_TYPE`. + // Which is a tag of an array, where the first element is the textual id/type of the object + // that follows it. + + err_conv(_encode_u16(e, TAG_OBJECT_TYPE, .Tag)) or_return + _encode_u8(e.writer, 2, .Array) or_return + + vti := reflect.union_variant_type_info(v) + #partial switch vt in vti.variant { + case reflect.Type_Info_Named: + err_conv(_encode_text(e, vt.name)) or_return + case: + builder := strings.builder_make(context.temp_allocator) or_return + defer strings.builder_destroy(&builder) + reflect.write_type(&builder, vti) + err_conv(_encode_text(e, strings.to_string(builder))) or_return + } + + return marshal_into(e, any{v.data, vti.id}) case runtime.Type_Info_Enum: return marshal_into(e, any{v.data, info.base.id}) diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index ef3ef45f2..509896d22 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -38,6 +38,15 @@ TAG_BASE64_ID :: "base64" // given content is definitely CBOR. TAG_SELF_DESCRIBED_CBOR :: 55799 +// A tag that is used to assign a textual type to the object following it. +// The tag's value must be an array of 2 items, where the first is text (describing the following type) +// and the second is any valid CBOR value. +// +// See the registration: https://datatracker.ietf.org/doc/draft-rundgren-cotx/05/ +// +// We use this in Odin to marshal and unmarshal unions. +TAG_OBJECT_TYPE :: 1010 + // A tag implementation that handles marshals and unmarshals for the tag it is registered on. Tag_Implementation :: struct { data: rawptr, diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 0da8e3f2a..c3ab6f908 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -8,9 +8,6 @@ import "core:runtime" import "core:strings" import "core:unicode/utf8" -// `strings` is only used in poly procs, but -vet thinks it is fully unused. -_ :: strings - /* Unmarshals the given CBOR into the given pointer using reflection. Types that require allocation are allocated using the given allocator. @@ -79,7 +76,7 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E dst = err_conv(decode(r, hdr)) or_return return } - + switch hdr { case .U8: decoded := _decode_u8(r) or_return @@ -275,10 +272,12 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E } nr := err_conv(_decode_tag_nr(r, add)) or_return - + // Custom tag implementations. if impl, ok := _tag_implementations_nr[nr]; ok { return impl->unmarshal(r, nr, v) + } else if nr == TAG_OBJECT_TYPE { + return _unmarshal_union(r, v, ti, hdr) } else { // Discard the tag info and unmarshal as its value. return _unmarshal_value(r, v, _decode_header(r) or_return) @@ -717,6 +716,73 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header } } +// Unmarshal into a union, based on the `TAG_OBJECT_TYPE` tag of the spec, it denotes a tag which +// contains an array of exactly two elements, the first is a textual representation of the following +// CBOR value's type. +_unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header) -> (err: Unmarshal_Error) { + #partial switch t in ti.variant { + case reflect.Type_Info_Union: + idhdr: Header + target_name: string + { + vhdr := _decode_header(r) or_return + vmaj, vadd := _header_split(vhdr) + if vmaj != .Array { + return .Bad_Tag_Value + } + + n_items, unknown := err_conv(_decode_container_length(r, vadd)) or_return + if unknown || n_items != 2 { + return .Bad_Tag_Value + } + + idhdr = _decode_header(r) or_return + idmaj, idadd := _header_split(idhdr) + if idmaj != .Text { + return .Bad_Tag_Value + } + + context.allocator = context.temp_allocator + target_name = err_conv(_decode_text(r, idadd)) or_return + } + defer delete(target_name, context.temp_allocator) + + for variant, i in t.variants { + tag := i64(i) + if !t.no_nil { + tag += 1 + } + + #partial switch vti in variant.variant { + case reflect.Type_Info_Named: + if vti.name == target_name { + reflect.set_union_variant_raw_tag(v, tag) + return _unmarshal_value(r, any{v.data, variant.id}, _decode_header(r) or_return) + } + + case: + builder := strings.builder_make(context.temp_allocator) + defer strings.builder_destroy(&builder) + + reflect.write_type(&builder, variant) + variant_name := strings.to_string(builder) + + if variant_name == target_name { + reflect.set_union_variant_raw_tag(v, tag) + return _unmarshal_value(r, any{v.data, variant.id}, _decode_header(r) or_return) + } + } + } + + // No variant matched. + return _unsupported(v, idhdr) + + case: + // Not a union. + return _unsupported(v, hdr) + } +} + _assign_int :: proc(val: any, i: $T) -> bool { v := reflect.any_core(val) diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 22359d830..06b96c915 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -6,10 +6,96 @@ import "core:fmt" import "core:intrinsics" import "core:math/big" import "core:mem" +import "core:os" import "core:reflect" import "core:testing" import "core:time" +TEST_count := 0 +TEST_fail := 0 + +when ODIN_TEST { + expect :: testing.expect + expect_value :: testing.expect_value + errorf :: testing.errorf + log :: testing.log + +} else { + expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { + TEST_count += 1 + if !condition { + TEST_fail += 1 + fmt.printf("[%v] %v\n", loc, message) + return + } + } + + expect_value :: proc(t: ^testing.T, value, expected: $T, loc := #caller_location) -> bool where intrinsics.type_is_comparable(T) { + TEST_count += 1 + ok := value == expected || reflect.is_nil(value) && reflect.is_nil(expected) + if !ok { + TEST_fail += 1 + fmt.printf("[%v] expected %v, got %v\n", loc, expected, value) + } + return ok + } + + errorf :: proc(t: ^testing.T, fmts: string, args: ..any, loc := #caller_location) { + TEST_fail += 1 + fmt.printf("[%v] ERROR: ", loc) + fmt.printf(fmts, ..args) + fmt.println() + } + + log :: proc(t: ^testing.T, v: any, loc := #caller_location) { + fmt.printf("[%v] ", loc) + fmt.printf("log: %v\n", v) + } +} + +main :: proc() { + t := testing.T{} + + test_marshalling(&t) + + test_marshalling_maybe(&t) + test_marshalling_nil_maybe(&t) + + test_cbor_marshalling_union(&t) + + test_decode_unsigned(&t) + test_encode_unsigned(&t) + + test_decode_negative(&t) + test_encode_negative(&t) + + test_decode_simples(&t) + test_encode_simples(&t) + + test_decode_floats(&t) + test_encode_floats(&t) + + test_decode_bytes(&t) + test_encode_bytes(&t) + + test_decode_strings(&t) + test_encode_strings(&t) + + test_decode_lists(&t) + test_encode_lists(&t) + + test_decode_maps(&t) + test_encode_maps(&t) + + test_decode_tags(&t) + test_encode_tags(&t) + + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + if TEST_fail > 0 { + os.exit(1) + } +} + Foo :: struct { str: string, cstr: cstring, @@ -58,7 +144,7 @@ test_marshalling :: proc(t: ^testing.T) { context.temp_allocator = context.allocator defer mem.tracking_allocator_destroy(&tracker) - ev :: testing.expect_value + ev :: expect_value { nice := "16 is a nice number" @@ -228,7 +314,7 @@ test_marshalling :: proc(t: ^testing.T) { } } - case: testing.error(t, v) + case: errorf(t, "wrong type %v", v) } ev(t, backf.neg, f.neg) @@ -258,22 +344,116 @@ test_marshalling :: proc(t: ^testing.T) { s_equals, s_err := big.equals(&backf.smallest, &f.smallest) ev(t, s_err, nil) if !s_equals { - testing.errorf(t, "smallest: %v does not equal %v", big.itoa(&backf.smallest), big.itoa(&f.smallest)) + errorf(t, "smallest: %v does not equal %v", big.itoa(&backf.smallest), big.itoa(&f.smallest)) } b_equals, b_err := big.equals(&backf.biggest, &f.biggest) ev(t, b_err, nil) if !b_equals { - testing.errorf(t, "biggest: %v does not equal %v", big.itoa(&backf.biggest), big.itoa(&f.biggest)) + errorf(t, "biggest: %v does not equal %v", big.itoa(&backf.biggest), big.itoa(&f.biggest)) } } for _, leak in tracker.allocation_map { - testing.errorf(t, "%v leaked %m\n", leak.location, leak.size) + errorf(t, "%v leaked %m\n", leak.location, leak.size) } for bad_free in tracker.bad_free_array { - testing.errorf(t, "%v allocation %p was freed badly\n", bad_free.location, bad_free.memory) + errorf(t, "%v allocation %p was freed badly\n", bad_free.location, bad_free.memory) + } +} + +@(test) +test_marshalling_maybe :: proc(t: ^testing.T) { + maybe_test: Maybe(int) = 1 + data, err := cbor.marshal(maybe_test) + expect_value(t, err, nil) + + val, derr := cbor.decode(string(data)) + expect_value(t, derr, nil) + + expect_value(t, cbor.diagnose(val), "1") + + maybe_dest: Maybe(int) + uerr := cbor.unmarshal(string(data), &maybe_dest) + expect_value(t, uerr, nil) + expect_value(t, maybe_dest, 1) +} + +@(test) +test_marshalling_nil_maybe :: proc(t: ^testing.T) { + maybe_test: Maybe(int) + data, err := cbor.marshal(maybe_test) + expect_value(t, err, nil) + + val, derr := cbor.decode(string(data)) + expect_value(t, derr, nil) + + expect_value(t, cbor.diagnose(val), "nil") + + maybe_dest: Maybe(int) + uerr := cbor.unmarshal(string(data), &maybe_dest) + expect_value(t, uerr, nil) + expect_value(t, maybe_dest, nil) +} + +@(test) +test_cbor_marshalling_union :: proc(t: ^testing.T) { + My_Distinct :: distinct string + + My_Enum :: enum { + One, + Two, + } + + My_Struct :: struct { + my_enum: My_Enum, + } + + My_Union :: union { + string, + My_Distinct, + My_Struct, + int, + } + + { + test: My_Union = My_Distinct("Hello, World!") + data, err := cbor.marshal(test) + expect_value(t, err, nil) + + val, derr := cbor.decode(string(data)) + expect_value(t, derr, nil) + + expect_value(t, cbor.diagnose(val, -1), `1010(["My_Distinct", "Hello, World!"])`) + + dest: My_Union + uerr := cbor.unmarshal(string(data), &dest) + expect_value(t, uerr, nil) + expect_value(t, dest, My_Distinct("Hello, World!")) + } + + My_Union_No_Nil :: union #no_nil { + string, + My_Distinct, + My_Struct, + int, + } + + { + test: My_Union_No_Nil = My_Struct{.Two} + data, err := cbor.marshal(test) + expect_value(t, err, nil) + + val, derr := cbor.decode(string(data)) + expect_value(t, derr, nil) + + expect_value(t, cbor.diagnose(val, -1), `1010(["My_Struct", {"my_enum": 1}])`) + + dest: My_Union_No_Nil + uerr := cbor.unmarshal(string(data), &dest) + expect_value(t, uerr, nil) + expect_value(t, dest, My_Struct{.Two}) } } @@ -500,34 +680,34 @@ test_encode_lists :: proc(t: ^testing.T) { err: cbor.Encode_Error err = cbor.encode_stream_begin(stream, .Array) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) { err = cbor.encode_stream_array_item(encoder, u8(1)) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) err = cbor.encode_stream_array_item(encoder, &cbor.Array{u8(2), u8(3)}) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) err = cbor.encode_stream_begin(stream, .Array) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) { err = cbor.encode_stream_array_item(encoder, u8(4)) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) err = cbor.encode_stream_array_item(encoder, u8(5)) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) } err = cbor.encode_stream_end(stream) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) } err = cbor.encode_stream_end(stream) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) - testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x9f\x01\x82\x02\x03\x9f\x04\x05\xff\xff"))) + expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x9f\x01\x82\x02\x03\x9f\x04\x05\xff\xff"))) } { @@ -535,26 +715,26 @@ test_encode_lists :: proc(t: ^testing.T) { err: cbor.Encode_Error err = cbor._encode_u8(stream, 2, .Array) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) a := "a" err = cbor.encode(encoder, &a) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) { err = cbor.encode_stream_begin(stream, .Map) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) b := "b" c := "c" err = cbor.encode_stream_map_entry(encoder, &b, &c) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) err = cbor.encode_stream_end(stream) - testing.expect_value(t, err, nil) + expect_value(t, err, nil) } - testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x82\x61\x61\xbf\x61\x62\x61\x63\xff"))) + expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)string("\x82\x61\x61\xbf\x61\x62\x61\x63\xff"))) } } @@ -619,13 +799,13 @@ expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: t res, err := cbor.decode(stream) defer cbor.destroy(res) - testing.expect_value(t, reflect.union_variant_typeid(res), type, loc) - testing.expect_value(t, err, nil, loc) + expect_value(t, reflect.union_variant_typeid(res), type, loc) + expect_value(t, err, nil, loc) str := cbor.diagnose(res, padding=-1) defer delete(str) - testing.expect_value(t, str, decoded, loc) + expect_value(t, str, decoded, loc) } expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_decoded: string, loc := #caller_location) { @@ -635,17 +815,17 @@ expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_de res, err := cbor.decode(stream) defer cbor.destroy(res) - testing.expect_value(t, err, nil, loc) + expect_value(t, err, nil, loc) if tag, is_tag := res.(^cbor.Tag); is_tag { - testing.expect_value(t, tag.number, nr, loc) + expect_value(t, tag.number, nr, loc) str := cbor.diagnose(tag, padding=-1) defer delete(str) - testing.expect_value(t, str, value_decoded, loc) + expect_value(t, str, value_decoded, loc) } else { - testing.errorf(t, "Value %#v is not a tag", res, loc) + errorf(t, "Value %#v is not a tag", res, loc) } } @@ -656,16 +836,16 @@ expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #calle res, err := cbor.decode(stream) defer cbor.destroy(res) - testing.expect_value(t, reflect.union_variant_typeid(res), typeid_of(T), loc) - testing.expect_value(t, err, nil, loc) + expect_value(t, reflect.union_variant_typeid(res), typeid_of(T), loc) + expect_value(t, err, nil, loc) #partial switch r in res { case f16: - when T == f16 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + when T == f16 { expect_value(t, res, expected, loc) } else { unreachable() } case f32: - when T == f32 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + when T == f32 { expect_value(t, res, expected, loc) } else { unreachable() } case f64: - when T == f64 { testing.expect_value(t, res, expected, loc) } else { unreachable() } + when T == f64 { expect_value(t, res, expected, loc) } else { unreachable() } case: unreachable() } @@ -675,8 +855,8 @@ expect_encoding :: proc(t: ^testing.T, val: cbor.Value, encoded: string, loc := bytes.buffer_reset(&buf) err := cbor.encode(encoder, val) - testing.expect_value(t, err, nil, loc) - testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) + expect_value(t, err, nil, loc) + expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) } expect_streamed_encoding :: proc(t: ^testing.T, encoded: string, values: ..cbor.Value, loc := #caller_location) { @@ -705,15 +885,15 @@ expect_streamed_encoding :: proc(t: ^testing.T, encoded: string, values: ..cbor. if err2 != nil { break } } case: - testing.errorf(t, "%v does not support streamed encoding", reflect.union_variant_typeid(value)) + errorf(t, "%v does not support streamed encoding", reflect.union_variant_typeid(value)) } - testing.expect_value(t, err, nil, loc) - testing.expect_value(t, err2, nil, loc) + expect_value(t, err, nil, loc) + expect_value(t, err2, nil, loc) } err := cbor.encode_stream_end(stream) - testing.expect_value(t, err, nil, loc) + expect_value(t, err, nil, loc) - testing.expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) + expect_value(t, fmt.tprint(bytes.buffer_to_bytes(&buf)), fmt.tprint(transmute([]byte)encoded), loc) } From 21e6e28a3a5609bc4db19dd2b1bc00ff7b1ac5e5 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 23:02:30 +0100 Subject: [PATCH 05/66] encoding/cbor: add decoder flags and protect from malicious untrusted input --- core/encoding/cbor/cbor.odin | 8 +- core/encoding/cbor/coding.odin | 279 ++++++++++++------- core/encoding/cbor/tags.odin | 32 +-- core/encoding/cbor/unmarshal.odin | 246 +++++++++------- tests/core/encoding/cbor/test_core_cbor.odin | 17 +- 5 files changed, 351 insertions(+), 231 deletions(-) diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index e91c53f3c..9c4bb0e4e 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -10,8 +10,13 @@ import "core:strings" // If we are decoding a stream of either a map or list, the initial capacity will be this value. INITIAL_STREAMED_CONTAINER_CAPACITY :: 8 + // If we are decoding a stream of either text or bytes, the initial capacity will be this value. -INITIAL_STREAMED_BYTES_CAPACITY :: 16 +INITIAL_STREAMED_BYTES_CAPACITY :: 16 + +// The default maximum amount of bytes to allocate on a buffer/container at once to prevent +// malicious input from causing massive allocations. +DEFAULT_MAX_PRE_ALLOC :: mem.Kilobyte // Known/common headers are defined, undefined headers can still be valid. // Higher 3 bits is for the major type and lower 5 bits for the additional information. @@ -157,6 +162,7 @@ Decode_Data_Error :: enum { Nested_Indefinite_Length, // When an streamed/indefinite length container nests another, this is not allowed. Nested_Tag, // When a tag's value is another tag, this is not allowed. Length_Too_Big, // When the length of a container (map, array, bytes, string) is more than `max(int)`. + Disallowed_Streaming, // When the `.Disallow_Streaming` flag is set and a streaming header is encountered. Break, } diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 5c14d8f87..e39519e01 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -33,16 +33,40 @@ Encoder_Flags :: bit_set[Encoder_Flag] // Flags for fully deterministic output (if you are not using streaming/indeterminate length). ENCODE_FULLY_DETERMINISTIC :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size, .Deterministic_Map_Sorting} + // Flags for the smallest encoding output. -ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} -// Flags for the fastest encoding output. -ENCODE_FAST :: Encoder_Flags{} +ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} Encoder :: struct { flags: Encoder_Flags, writer: io.Writer, } +Decoder_Flag :: enum { + // Rejects (with an error `.Disallowed_Streaming`) when a streaming CBOR header is encountered. + Disallow_Streaming, + + // Pre-allocates buffers and containers with the size that was set in the CBOR header. + // This should only be enabled when you control both ends of the encoding, if you don't, + // attackers can craft input that causes massive (`max(u64)`) byte allocations for a few bytes of + // CBOR. + Trusted_Input, + + // Makes the decoder shrink of excess capacity from allocated buffers/containers before returning. + Shrink_Excess, +} + +Decoder_Flags :: bit_set[Decoder_Flag] + +Decoder :: struct { + // The max amount of bytes allowed to pre-allocate when `.Trusted_Input` is not set on the + // flags. + max_pre_alloc: int, + + flags: Decoder_Flags, + reader: io.Reader, +} + /* Decodes both deterministic and non-deterministic CBOR into a `Value` variant. @@ -52,28 +76,60 @@ Allocations are done using the given allocator, *no* allocations are done on the `context.temp_allocator`. A value can be (fully and recursively) deallocated using the `destroy` proc in this package. + +Disable streaming/indeterminate lengths with the `.Disallow_Streaming` flag. + +Shrink excess bytes in buffers and containers with the `.Shrink_Excess` flag. + +Mark the input as trusted input with the `.Trusted_Input` flag, this turns off the safety feature +of not pre-allocating more than `max_pre_alloc` bytes before reading into the bytes. You should only +do this when you own both sides of the encoding and are sure there can't be malicious bytes used as +an input. */ -decode :: proc { - decode_string, - decode_reader, +decode_from :: proc { + decode_from_string, + decode_from_reader, + decode_from_decoder, } +decode :: decode_from // Decodes the given string as CBOR. // See docs on the proc group `decode` for more information. -decode_string :: proc(s: string, allocator := context.allocator) -> (v: Value, err: Decode_Error) { +decode_from_string :: proc(s: string, flags: Decoder_Flags = {}, allocator := context.allocator) -> (v: Value, err: Decode_Error) { context.allocator = allocator - r: strings.Reader strings.reader_init(&r, s) - return decode(strings.reader_to_stream(&r), allocator=allocator) + return decode_from_reader(strings.reader_to_stream(&r), flags) } // Reads a CBOR value from the given reader. // See docs on the proc group `decode` for more information. -decode_reader :: proc(r: io.Reader, hdr: Header = Header(0), allocator := context.allocator) -> (v: Value, err: Decode_Error) { +decode_from_reader :: proc(r: io.Reader, flags: Decoder_Flags = {}, allocator := context.allocator) -> (v: Value, err: Decode_Error) { + return decode_from_decoder( + Decoder{ DEFAULT_MAX_PRE_ALLOC, flags, r }, + allocator=allocator, + ) +} + +// Reads a CBOR value from the given decoder. +// See docs on the proc group `decode` for more information. +decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: Value, err: Decode_Error) { context.allocator = allocator + d := d + if d.max_pre_alloc <= 0 { + d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC + } + + v, err = _decode_from_decoder(d) + // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. + if err == .EOF { err = .Unexpected_EOF } + return +} + +_decode_from_decoder :: proc(d: Decoder, hdr: Header = Header(0)) -> (v: Value, err: Decode_Error) { hdr := hdr + r := d.reader if hdr == Header(0) { hdr = _decode_header(r) or_return } switch hdr { case .U8: return _decode_u8 (r) @@ -105,11 +161,11 @@ decode_reader :: proc(r: io.Reader, hdr: Header = Header(0), allocator := contex switch maj { case .Unsigned: return _decode_tiny_u8(add) case .Negative: return Negative_U8(_decode_tiny_u8(add) or_return), nil - case .Bytes: return _decode_bytes_ptr(r, add) - case .Text: return _decode_text_ptr(r, add) - case .Array: return _decode_array_ptr(r, add) - case .Map: return _decode_map_ptr(r, add) - case .Tag: return _decode_tag_ptr(r, add) + case .Bytes: return _decode_bytes_ptr(d, add) + case .Text: return _decode_text_ptr(d, add) + case .Array: return _decode_array_ptr(d, add) + case .Map: return _decode_map_ptr(d, add) + case .Tag: return _decode_tag_ptr(d, add) case .Other: return _decode_tiny_simple(add) case: return nil, .Bad_Major } @@ -246,7 +302,7 @@ _encode_u8 :: proc(w: io.Writer, v: u8, major: Major = .Unsigned) -> (err: io.Er } _decode_tiny_u8 :: proc(additional: Add) -> (u8, Decode_Data_Error) { - if intrinsics.expect(additional < .One_Byte, true) { + if additional < .One_Byte { return u8(additional), nil } @@ -316,64 +372,53 @@ _encode_u64_exact :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> (er return } -_decode_bytes_ptr :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: ^Bytes, err: Decode_Error) { +_decode_bytes_ptr :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: ^Bytes, err: Decode_Error) { v = new(Bytes) or_return defer if err != nil { free(v) } - v^ = _decode_bytes(r, add, type) or_return + v^ = _decode_bytes(d, add, type) or_return return } -_decode_bytes :: proc(r: io.Reader, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { - _n_items, length_is_unknown := _decode_container_length(r, add) or_return +_decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { + n, scap := _decode_len_str(d, add) or_return + + buf := strings.builder_make(0, scap) or_return + defer if err != nil { strings.builder_destroy(&buf) } + buf_stream := strings.to_stream(&buf) - n_items := _n_items.? or_else INITIAL_STREAMED_BYTES_CAPACITY - - if length_is_unknown { - buf: strings.Builder - buf.buf = make([dynamic]byte, 0, n_items) or_return - defer if err != nil { strings.builder_destroy(&buf) } - - buf_stream := strings.to_stream(&buf) - - for { - header := _decode_header(r) or_return + if n == -1 { + indefinite_loop: for { + header := _decode_header(d.reader) or_return maj, add := _header_split(header) - #partial switch maj { case type: - _n_items, length_is_unknown := _decode_container_length(r, add) or_return - if length_is_unknown { + iter_n, iter_cap := _decode_len_str(d, add) or_return + if iter_n == -1 { return nil, .Nested_Indefinite_Length } - n_items := i64(_n_items.?) + reserve(&buf.buf, len(buf.buf) + iter_cap) or_return + io.copy_n(buf_stream, d.reader, i64(iter_n)) or_return - copied := io.copy_n(buf_stream, r, n_items) or_return - assert(copied == n_items) - case .Other: if add != .Break { return nil, .Bad_Argument } - - v = buf.buf[:] - - // Write zero byte so this can be converted to cstring. - io.write_full(buf_stream, {0}) or_return - shrink(&buf.buf) // Ignoring error, this is not critical to succeed. - return + break indefinite_loop case: return nil, .Bad_Major } } } else { - v = make([]byte, n_items + 1) or_return // Space for the bytes and a zero byte. - defer if err != nil { delete(v) } - - io.read_full(r, v[:n_items]) or_return - - v = v[:n_items] // Take off zero byte. - return + io.copy_n(buf_stream, d.reader, i64(n)) or_return } + + v = buf.buf[:] + + // Write zero byte so this can be converted to cstring. + strings.write_byte(&buf, 0) + + if .Shrink_Excess in d.flags { shrink(&buf.buf) } + return } _encode_bytes :: proc(e: Encoder, val: Bytes, major: Major = .Bytes) -> (err: Encode_Error) { @@ -383,43 +428,41 @@ _encode_bytes :: proc(e: Encoder, val: Bytes, major: Major = .Bytes) -> (err: En return } -_decode_text_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Text, err: Decode_Error) { +_decode_text_ptr :: proc(d: Decoder, add: Add) -> (v: ^Text, err: Decode_Error) { v = new(Text) or_return defer if err != nil { free(v) } - v^ = _decode_text(r, add) or_return + v^ = _decode_text(d, add) or_return return } -_decode_text :: proc(r: io.Reader, add: Add) -> (v: Text, err: Decode_Error) { - return (Text)(_decode_bytes(r, add, .Text) or_return), nil +_decode_text :: proc(d: Decoder, add: Add) -> (v: Text, err: Decode_Error) { + return (Text)(_decode_bytes(d, add, .Text) or_return), nil } _encode_text :: proc(e: Encoder, val: Text) -> Encode_Error { return _encode_bytes(e, transmute([]byte)val, .Text) } -_decode_array_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Array, err: Decode_Error) { +_decode_array_ptr :: proc(d: Decoder, add: Add) -> (v: ^Array, err: Decode_Error) { v = new(Array) or_return defer if err != nil { free(v) } - v^ = _decode_array(r, add) or_return + v^ = _decode_array(d, add) or_return return } -_decode_array :: proc(r: io.Reader, add: Add) -> (v: Array, err: Decode_Error) { - _n_items, length_is_unknown := _decode_container_length(r, add) or_return - n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY - - array := make([dynamic]Value, 0, n_items) or_return +_decode_array :: proc(d: Decoder, add: Add) -> (v: Array, err: Decode_Error) { + n, scap := _decode_len_container(d, add) or_return + array := make([dynamic]Value, 0, scap) or_return defer if err != nil { for entry in array { destroy(entry) } delete(array) } - for i := 0; length_is_unknown || i < n_items; i += 1 { - val, verr := decode(r) - if length_is_unknown && verr == .Break { + for i := 0; n == -1 || i < n; i += 1 { + val, verr := _decode_from_decoder(d) + if n == -1 && verr == .Break { break } else if verr != nil { err = verr @@ -428,8 +471,9 @@ _decode_array :: proc(r: io.Reader, add: Add) -> (v: Array, err: Decode_Error) { append(&array, val) or_return } + + if .Shrink_Excess in d.flags { shrink(&array) } - shrink(&array) v = array[:] return } @@ -443,19 +487,17 @@ _encode_array :: proc(e: Encoder, arr: Array) -> Encode_Error { return nil } -_decode_map_ptr :: proc(r: io.Reader, add: Add) -> (v: ^Map, err: Decode_Error) { +_decode_map_ptr :: proc(d: Decoder, add: Add) -> (v: ^Map, err: Decode_Error) { v = new(Map) or_return defer if err != nil { free(v) } - v^ = _decode_map(r, add) or_return + v^ = _decode_map(d, add) or_return return } -_decode_map :: proc(r: io.Reader, add: Add) -> (v: Map, err: Decode_Error) { - _n_items, length_is_unknown := _decode_container_length(r, add) or_return - n_items := _n_items.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY - - items := make([dynamic]Map_Entry, 0, n_items) or_return +_decode_map :: proc(d: Decoder, add: Add) -> (v: Map, err: Decode_Error) { + n, scap := _decode_len_container(d, add) or_return + items := make([dynamic]Map_Entry, 0, scap) or_return defer if err != nil { for entry in items { destroy(entry.key) @@ -464,23 +506,24 @@ _decode_map :: proc(r: io.Reader, add: Add) -> (v: Map, err: Decode_Error) { delete(items) } - for i := 0; length_is_unknown || i < n_items; i += 1 { - key, kerr := decode(r) - if length_is_unknown && kerr == .Break { + for i := 0; n == -1 || i < n; i += 1 { + key, kerr := _decode_from_decoder(d) + if n == -1 && kerr == .Break { break } else if kerr != nil { return nil, kerr } - value := decode(r) or_return + value := decode_from_decoder(d) or_return append(&items, Map_Entry{ key = key, value = value, }) or_return } + + if .Shrink_Excess in d.flags { shrink(&items) } - shrink(&items) v = items[:] return } @@ -537,8 +580,8 @@ _encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { return nil } -_decode_tag_ptr :: proc(r: io.Reader, add: Add) -> (v: Value, err: Decode_Error) { - tag := _decode_tag(r, add) or_return +_decode_tag_ptr :: proc(d: Decoder, add: Add) -> (v: Value, err: Decode_Error) { + tag := _decode_tag(d, add) or_return if t, ok := tag.?; ok { defer if err != nil { destroy(t.value) } tp := new(Tag) or_return @@ -547,11 +590,11 @@ _decode_tag_ptr :: proc(r: io.Reader, add: Add) -> (v: Value, err: Decode_Error) } // no error, no tag, this was the self described CBOR tag, skip it. - return decode(r) + return _decode_from_decoder(d) } -_decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error) { - num := _decode_tag_nr(r, add) or_return +_decode_tag :: proc(d: Decoder, add: Add) -> (v: Maybe(Tag), err: Decode_Error) { + num := _decode_uint_as_u64(d.reader, add) or_return // CBOR can be wrapped in a tag that decoders can use to see/check if the binary data is CBOR. // We can ignore it here. @@ -561,7 +604,7 @@ _decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error t := Tag{ number = num, - value = decode(r) or_return, + value = _decode_from_decoder(d) or_return, } if nested, ok := t.value.(^Tag); ok { @@ -572,7 +615,7 @@ _decode_tag :: proc(r: io.Reader, add: Add) -> (v: Maybe(Tag), err: Decode_Error return t, nil } -_decode_tag_nr :: proc(r: io.Reader, add: Add) -> (nr: Tag_Number, err: Decode_Error) { +_decode_uint_as_u64 :: proc(r: io.Reader, add: Add) -> (nr: u64, err: Decode_Error) { #partial switch add { case .One_Byte: return u64(_decode_u8(r) or_return), nil case .Two_Bytes: return u64(_decode_u16(r) or_return), nil @@ -719,30 +762,50 @@ encode_stream_map_entry :: proc(e: Encoder, key: Value, val: Value) -> Encode_Er return encode(e, val) } -// - -_decode_container_length :: proc(r: io.Reader, add: Add) -> (length: Maybe(int), is_unknown: bool, err: Decode_Error) { - if add == Add.Length_Unknown { return nil, true, nil } - #partial switch add { - case .One_Byte: length = int(_decode_u8(r) or_return) - case .Two_Bytes: length = int(_decode_u16(r) or_return) - case .Four_Bytes: - big_length := _decode_u32(r) or_return - if u64(big_length) > u64(max(int)) { - err = .Length_Too_Big - return +// For `Bytes` and `Text` strings: Decodes the number of items the header says follows. +// If the number is not specified -1 is returned and streaming should be initiated. +// A suitable starting capacity is also returned for a buffer that is allocated up the stack. +_decode_len_str :: proc(d: Decoder, add: Add) -> (n: int, scap: int, err: Decode_Error) { + if add == .Length_Unknown { + if .Disallow_Streaming in d.flags { + return -1, -1, .Disallowed_Streaming } - length = int(big_length) - case .Eight_Bytes: - big_length := _decode_u64(r) or_return - if big_length > u64(max(int)) { - err = .Length_Too_Big - return - } - length = int(big_length) - case: - length = int(_decode_tiny_u8(add) or_return) + return -1, INITIAL_STREAMED_BYTES_CAPACITY, nil } + + _n := _decode_uint_as_u64(d.reader, add) or_return + if _n > u64(max(int)) { return -1, -1, .Length_Too_Big } + n = int(_n) + + scap = n + 1 // Space for zero byte. + if .Trusted_Input not_in d.flags { + scap = min(d.max_pre_alloc, scap) + } + + return +} + +// For `Array` and `Map` types: Decodes the number of items the header says follows. +// If the number is not specified -1 is returned and streaming should be initiated. +// A suitable starting capacity is also returned for a buffer that is allocated up the stack. +_decode_len_container :: proc(d: Decoder, add: Add) -> (n: int, scap: int, err: Decode_Error) { + if add == .Length_Unknown { + if .Disallow_Streaming in d.flags { + return -1, -1, .Disallowed_Streaming + } + return -1, INITIAL_STREAMED_CONTAINER_CAPACITY, nil + } + + _n := _decode_uint_as_u64(d.reader, add) or_return + if _n > u64(max(int)) { return -1, -1, .Length_Too_Big } + n = int(_n) + + scap = n + if .Trusted_Input not_in d.flags { + // NOTE: if this is a map it will be twice this. + scap = min(d.max_pre_alloc / size_of(Value), scap) + } + return } diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 509896d22..d2867e7be 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -55,7 +55,7 @@ Tag_Implementation :: struct { } // Procedure responsible for umarshalling the tag out of the reader into the given `any`. -Tag_Unmarshal_Proc :: #type proc(self: ^Tag_Implementation, r: io.Reader, tag_nr: Tag_Number, v: any) -> Unmarshal_Error +Tag_Unmarshal_Proc :: #type proc(self: ^Tag_Implementation, d: Decoder, tag_nr: Tag_Number, v: any) -> Unmarshal_Error // Procedure responsible for marshalling the tag in the given `any` into the given encoder. Tag_Marshal_Proc :: #type proc(self: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_Error @@ -121,30 +121,30 @@ tags_register_defaults :: proc() { // // See RFC 8949 section 3.4.2. @(private) -tag_time_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { - hdr := _decode_header(r) or_return +tag_time_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(d.reader) or_return #partial switch hdr { case .U8, .U16, .U32, .U64, .Neg_U8, .Neg_U16, .Neg_U32, .Neg_U64: switch &dst in v { case time.Time: i: i64 - _unmarshal_any_ptr(r, &i, hdr) or_return + _unmarshal_any_ptr(d, &i, hdr) or_return dst = time.unix(i64(i), 0) return case: - return _unmarshal_value(r, v, hdr) + return _unmarshal_value(d, v, hdr) } case .F16, .F32, .F64: switch &dst in v { case time.Time: f: f64 - _unmarshal_any_ptr(r, &f, hdr) or_return + _unmarshal_any_ptr(d, &f, hdr) or_return whole, fract := math.modf(f) dst = time.unix(i64(whole), i64(fract * 1e9)) return case: - return _unmarshal_value(r, v, hdr) + return _unmarshal_value(d, v, hdr) } case: @@ -182,8 +182,8 @@ tag_time_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_ } @(private) -tag_big_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, tnr: Tag_Number, v: any) -> (err: Unmarshal_Error) { - hdr := _decode_header(r) or_return +tag_big_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, tnr: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(d.reader) or_return maj, add := _header_split(hdr) if maj != .Bytes { // Only bytes are supported in this tag. @@ -192,7 +192,7 @@ tag_big_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, tnr: Tag_Number, switch &dst in v { case big.Int: - bytes := err_conv(_decode_bytes(r, add)) or_return + bytes := err_conv(_decode_bytes(d, add)) or_return defer delete(bytes) if err := big.int_from_bytes_big(&dst, bytes); err != nil { @@ -246,13 +246,13 @@ tag_big_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_E } @(private) -tag_cbor_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> Unmarshal_Error { - hdr := _decode_header(r) or_return +tag_cbor_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, v: any) -> Unmarshal_Error { + hdr := _decode_header(d.reader) or_return major, add := _header_split(hdr) #partial switch major { case .Bytes: ti := reflect.type_info_base(type_info_of(v.id)) - return _unmarshal_bytes(r, v, ti, hdr, add) + return _unmarshal_bytes(d, v, ti, hdr, add) case: return .Bad_Tag_Value } @@ -283,8 +283,8 @@ tag_cbor_marshal :: proc(_: ^Tag_Implementation, e: Encoder, v: any) -> Marshal_ } @(private) -tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { - hdr := _decode_header(r) or_return +tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, v: any) -> (err: Unmarshal_Error) { + hdr := _decode_header(d.reader) or_return major, add := _header_split(hdr) ti := reflect.type_info_base(type_info_of(v.id)) @@ -294,7 +294,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, r: io.Reader, _: Tag_Number bytes: string; { context.allocator = context.temp_allocator - bytes = string(err_conv(_decode_bytes(r, add)) or_return) + bytes = string(err_conv(_decode_bytes(d, add)) or_return) } defer delete(bytes, context.temp_allocator) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index c3ab6f908..2df99ca71 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -15,25 +15,56 @@ Types that require allocation are allocated using the given allocator. Some temporary allocations are done on the `context.temp_allocator`, but, if you want to, this can be set to a "normal" allocator, because the necessary `delete` and `free` calls are still made. This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. + +Disable streaming/indeterminate lengths with the `.Disallow_Streaming` flag. + +Shrink excess bytes in buffers and containers with the `.Shrink_Excess` flag. + +Mark the input as trusted input with the `.Trusted_Input` flag, this turns off the safety feature +of not pre-allocating more than `max_pre_alloc` bytes before reading into the bytes. You should only +do this when you own both sides of the encoding and are sure there can't be malicious bytes used as +an input. */ unmarshal :: proc { unmarshal_from_reader, unmarshal_from_string, } -// Unmarshals from a reader, see docs on the proc group `Unmarshal` for more info. -unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { - return _unmarshal_any_ptr(r, ptr, allocator=allocator) +unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator) -> (err: Unmarshal_Error) { + err = unmarshal_from_decoder(Decoder{ DEFAULT_MAX_PRE_ALLOC, flags, r }, ptr, allocator=allocator) + + // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. + if err == .EOF { err = .Unexpected_EOF } + return } // Unmarshals from a string, see docs on the proc group `Unmarshal` for more info. -unmarshal_from_string :: proc(s: string, ptr: ^$T, allocator := context.allocator) -> Unmarshal_Error { +unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator) -> (err: Unmarshal_Error) { sr: strings.Reader r := strings.to_reader(&sr, s) - return _unmarshal_any_ptr(r, ptr, allocator=allocator) + + err = unmarshal_from_reader(r, ptr, flags, allocator) + + // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. + if err == .EOF { err = .Unexpected_EOF } + return } -_unmarshal_any_ptr :: proc(r: io.Reader, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { +unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { + d := d + if d.max_pre_alloc <= 0 { + d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC + } + + err = _unmarshal_any_ptr(d, ptr, allocator=allocator) + + // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. + if err == .EOF { err = .Unexpected_EOF } + return + +} + +_unmarshal_any_ptr :: proc(d: Decoder, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { context.allocator = allocator v := v @@ -48,12 +79,13 @@ _unmarshal_any_ptr :: proc(r: io.Reader, v: any, hdr: Maybe(Header) = nil, alloc } data := any{(^rawptr)(v.data)^, ti.variant.(reflect.Type_Info_Pointer).elem.id} - return _unmarshal_value(r, data, hdr.? or_else (_decode_header(r) or_return)) + return _unmarshal_value(d, data, hdr.? or_else (_decode_header(d.reader) or_return)) } -_unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_Error) { +_unmarshal_value :: proc(d: Decoder, v: any, hdr: Header) -> (err: Unmarshal_Error) { v := v ti := reflect.type_info_base(type_info_of(v.id)) + r := d.reader // If it's a union with only one variant, then treat it as that variant if u, ok := ti.variant.(reflect.Type_Info_Union); ok && len(u.variants) == 1 { @@ -73,7 +105,7 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E // Allow generic unmarshal by doing it into a `Value`. switch &dst in v { case Value: - dst = err_conv(decode(r, hdr)) or_return + dst = err_conv(_decode_from_decoder(d, hdr)) or_return return } @@ -253,7 +285,7 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E case .Tag: switch &dst in v { case ^Tag: - tval := err_conv(_decode_tag_ptr(r, add)) or_return + tval := err_conv(_decode_tag_ptr(d, add)) or_return if t, is_tag := tval.(^Tag); is_tag { dst = t return @@ -262,7 +294,7 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E destroy(tval) return .Bad_Tag_Value case Tag: - t := err_conv(_decode_tag(r, add)) or_return + t := err_conv(_decode_tag(d, add)) or_return if t, is_tag := t.?; is_tag { dst = t return @@ -271,33 +303,33 @@ _unmarshal_value :: proc(r: io.Reader, v: any, hdr: Header) -> (err: Unmarshal_E return .Bad_Tag_Value } - nr := err_conv(_decode_tag_nr(r, add)) or_return + nr := err_conv(_decode_uint_as_u64(r, add)) or_return // Custom tag implementations. if impl, ok := _tag_implementations_nr[nr]; ok { - return impl->unmarshal(r, nr, v) + return impl->unmarshal(d, nr, v) } else if nr == TAG_OBJECT_TYPE { - return _unmarshal_union(r, v, ti, hdr) + return _unmarshal_union(d, v, ti, hdr) } else { // Discard the tag info and unmarshal as its value. - return _unmarshal_value(r, v, _decode_header(r) or_return) + return _unmarshal_value(d, v, _decode_header(r) or_return) } return _unsupported(v, hdr, add) - case .Bytes: return _unmarshal_bytes(r, v, ti, hdr, add) - case .Text: return _unmarshal_string(r, v, ti, hdr, add) - case .Array: return _unmarshal_array(r, v, ti, hdr, add) - case .Map: return _unmarshal_map(r, v, ti, hdr, add) + case .Bytes: return _unmarshal_bytes(d, v, ti, hdr, add) + case .Text: return _unmarshal_string(d, v, ti, hdr, add) + case .Array: return _unmarshal_array(d, v, ti, hdr, add) + case .Map: return _unmarshal_map(d, v, ti, hdr, add) case: return .Bad_Major } } -_unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { +_unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: - bytes := err_conv(_decode_bytes(r, add)) or_return + bytes := err_conv(_decode_bytes(d, add)) or_return if t.is_cstring { raw := (^cstring)(v.data) @@ -316,7 +348,7 @@ _unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if elem_base.id != byte { return _unsupported(v, hdr) } - bytes := err_conv(_decode_bytes(r, add)) or_return + bytes := err_conv(_decode_bytes(d, add)) or_return raw := (^mem.Raw_Slice)(v.data) raw^ = transmute(mem.Raw_Slice)bytes return @@ -326,7 +358,7 @@ _unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if elem_base.id != byte { return _unsupported(v, hdr) } - bytes := err_conv(_decode_bytes(r, add)) or_return + bytes := err_conv(_decode_bytes(d, add)) or_return raw := (^mem.Raw_Dynamic_Array)(v.data) raw.data = raw_data(bytes) raw.len = len(bytes) @@ -339,11 +371,9 @@ _unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if elem_base.id != byte { return _unsupported(v, hdr) } - bytes: []byte; { - context.allocator = context.temp_allocator - bytes = err_conv(_decode_bytes(r, add)) or_return - } - defer delete(bytes, context.temp_allocator) + context.allocator = context.temp_allocator + bytes := err_conv(_decode_bytes(d, add)) or_return + defer delete(bytes) if len(bytes) > t.count { return _unsupported(v, hdr) } @@ -357,10 +387,10 @@ _unmarshal_bytes :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head return _unsupported(v, hdr) } -_unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { +_unmarshal_string :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { #partial switch t in ti.variant { case reflect.Type_Info_String: - text := err_conv(_decode_text(r, add)) or_return + text := err_conv(_decode_text(d, add)) or_return if t.is_cstring { raw := (^cstring)(v.data) @@ -376,8 +406,8 @@ _unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Hea // Enum by its variant name. case reflect.Type_Info_Enum: context.allocator = context.temp_allocator - text := err_conv(_decode_text(r, add)) or_return - defer delete(text, context.temp_allocator) + text := err_conv(_decode_text(d, add)) or_return + defer delete(text) for name, i in t.names { if name == text { @@ -388,8 +418,8 @@ _unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Hea case reflect.Type_Info_Rune: context.allocator = context.temp_allocator - text := err_conv(_decode_text(r, add)) or_return - defer delete(text, context.temp_allocator) + text := err_conv(_decode_text(d, add)) or_return + defer delete(text) r := (^rune)(v.data) dr, n := utf8.decode_rune(text) @@ -404,21 +434,19 @@ _unmarshal_string :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Hea return _unsupported(v, hdr) } -_unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { - +_unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { assign_array :: proc( - r: io.Reader, + d: Decoder, da: ^mem.Raw_Dynamic_Array, elemt: ^reflect.Type_Info, - _length: Maybe(int), + length: int, growable := true, ) -> (out_of_space: bool, err: Unmarshal_Error) { - length, has_length := _length.? - for idx: uintptr = 0; !has_length || idx < uintptr(length); idx += 1 { + for idx: uintptr = 0; length == -1 || idx < uintptr(length); idx += 1 { elem_ptr := rawptr(uintptr(da.data) + idx*uintptr(elemt.size)) elem := any{elem_ptr, elemt.id} - hdr := _decode_header(r) or_return + hdr := _decode_header(d.reader) or_return // Double size if out of capacity. if da.cap <= da.len { @@ -432,8 +460,8 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if !ok { return false, .Out_Of_Memory } } - err = _unmarshal_value(r, elem, hdr) - if !has_length && err == .Break { break } + err = _unmarshal_value(d, elem, hdr) + if length == -1 && err == .Break { break } if err != nil { return } da.len += 1 @@ -445,26 +473,25 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head // Allow generically storing the values array. switch &dst in v { case ^Array: - dst = err_conv(_decode_array_ptr(r, add)) or_return + dst = err_conv(_decode_array_ptr(d, add)) or_return return case Array: - dst = err_conv(_decode_array(r, add)) or_return + dst = err_conv(_decode_array(d, add)) or_return return } #partial switch t in ti.variant { case reflect.Type_Info_Slice: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + length, scap := err_conv(_decode_len_container(d, add)) or_return - data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + data := mem.alloc_bytes_non_zeroed(t.elem.size * scap, t.elem.align) or_return defer if err != nil { mem.free_bytes(data) } da := mem.Raw_Dynamic_Array{raw_data(data), 0, length, context.allocator } - assign_array(r, &da, t.elem, _length) or_return + assign_array(d, &da, t.elem, length) or_return - if da.len < da.cap { + if .Shrink_Excess in d.flags { // Ignoring an error here, but this is not critical to succeed. _ = runtime.__dynamic_array_shrink(&da, t.elem.size, t.elem.align, da.len) } @@ -475,54 +502,58 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head return case reflect.Type_Info_Dynamic_Array: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else INITIAL_STREAMED_CONTAINER_CAPACITY + length, scap := err_conv(_decode_len_container(d, add)) or_return - data := mem.alloc_bytes_non_zeroed(t.elem.size * length, t.elem.align) or_return + data := mem.alloc_bytes_non_zeroed(t.elem.size * scap, t.elem.align) or_return defer if err != nil { mem.free_bytes(data) } - raw := (^mem.Raw_Dynamic_Array)(v.data) - raw.data = raw_data(data) - raw.len = 0 - raw.cap = length - raw.allocator = context.allocator + raw := (^mem.Raw_Dynamic_Array)(v.data) + raw.data = raw_data(data) + raw.len = 0 + raw.cap = length + raw.allocator = context.allocator - _ = assign_array(r, raw, t.elem, _length) or_return + _ = assign_array(d, raw, t.elem, length) or_return + + if .Shrink_Excess in d.flags { + // Ignoring an error here, but this is not critical to succeed. + _ = runtime.__dynamic_array_shrink(raw, t.elem.size, t.elem.align, raw.len) + } return case reflect.Type_Info_Array: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else t.count + _length, scap := err_conv(_decode_len_container(d, add)) or_return + length := min(scap, t.count) - if !unknown && length > t.count { + if length > t.count { return _unsupported(v, hdr) } da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } - out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return if out_of_space { return _unsupported(v, hdr) } return case reflect.Type_Info_Enumerated_Array: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else t.count + _length, scap := err_conv(_decode_len_container(d, add)) or_return + length := min(scap, t.count) - if !unknown && length > t.count { + if length > t.count { return _unsupported(v, hdr) } da := mem.Raw_Dynamic_Array{rawptr(v.data), 0, length, context.allocator } - out_of_space := assign_array(r, &da, t.elem, _length, growable=false) or_return + out_of_space := assign_array(d, &da, t.elem, length, growable=false) or_return if out_of_space { return _unsupported(v, hdr) } return case reflect.Type_Info_Complex: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else 2 + _length, scap := err_conv(_decode_len_container(d, add)) or_return + length := min(scap, 2) - if !unknown && length > 2 { + if length > 2 { return _unsupported(v, hdr) } @@ -536,15 +567,15 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head case: unreachable() } - out_of_space := assign_array(r, &da, info, 2, growable=false) or_return + out_of_space := assign_array(d, &da, info, 2, growable=false) or_return if out_of_space { return _unsupported(v, hdr) } return case reflect.Type_Info_Quaternion: - _length, unknown := err_conv(_decode_container_length(r, add)) or_return - length := _length.? or_else 4 + _length, scap := err_conv(_decode_len_container(d, add)) or_return + length := min(scap, 4) - if !unknown && length > 4 { + if length > 4 { return _unsupported(v, hdr) } @@ -558,7 +589,7 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head case: unreachable() } - out_of_space := assign_array(r, &da, info, 4, growable=false) or_return + out_of_space := assign_array(d, &da, info, 4, growable=false) or_return if out_of_space { return _unsupported(v, hdr) } return @@ -566,17 +597,17 @@ _unmarshal_array :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head } } -_unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { - - decode_key :: proc(r: io.Reader, v: any) -> (k: string, err: Unmarshal_Error) { - entry_hdr := _decode_header(r) or_return +_unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { + r := d.reader + decode_key :: proc(d: Decoder, v: any) -> (k: string, err: Unmarshal_Error) { + entry_hdr := _decode_header(d.reader) or_return entry_maj, entry_add := _header_split(entry_hdr) #partial switch entry_maj { case .Text: - k = err_conv(_decode_text(r, entry_add)) or_return + k = err_conv(_decode_text(d, entry_add)) or_return return case .Bytes: - bytes := err_conv(_decode_bytes(r, entry_add)) or_return + bytes := err_conv(_decode_bytes(d, entry_add)) or_return k = string(bytes) return case: @@ -588,10 +619,10 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header // Allow generically storing the map array. switch &dst in v { case ^Map: - dst = err_conv(_decode_map_ptr(r, add)) or_return + dst = err_conv(_decode_map_ptr(d, add)) or_return return case Map: - dst = err_conv(_decode_map(r, add)) or_return + dst = err_conv(_decode_map(d, add)) or_return return } @@ -601,14 +632,15 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header return _unsupported(v, hdr) } - length, unknown := err_conv(_decode_container_length(r, add)) or_return + length, scap := err_conv(_decode_len_container(d, add)) or_return + unknown := length == -1 fields := reflect.struct_fields_zipped(ti.id) - for idx := 0; unknown || idx < length.?; idx += 1 { + for idx := 0; idx < len(fields) && (unknown || idx < length); idx += 1 { // Decode key, keys can only be strings. key: string; { context.allocator = context.temp_allocator - if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + if keyv, kerr := decode_key(d, v); unknown && kerr == .Break { break } else if kerr != nil { err = kerr @@ -641,11 +673,11 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header } } - field := fields[use_field_idx] - name := field.name - ptr := rawptr(uintptr(v.data) + field.offset) - fany := any{ptr, field.type.id} - _unmarshal_value(r, fany, _decode_header(r) or_return) or_return + field := fields[use_field_idx] + name := field.name + ptr := rawptr(uintptr(v.data) + field.offset) + fany := any{ptr, field.type.id} + _unmarshal_value(d, fany, _decode_header(r) or_return) or_return } return @@ -654,6 +686,8 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header return _unsupported(v, hdr) } + // TODO: shrink excess. + raw_map := (^mem.Raw_Map)(v.data) if raw_map.allocator.procedure == nil { raw_map.allocator = context.allocator @@ -663,10 +697,11 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header _ = runtime.map_free_dynamic(raw_map^, t.map_info) } - length, unknown := err_conv(_decode_container_length(r, add)) or_return + length, scap := err_conv(_decode_len_container(d, add)) or_return + unknown := length == -1 if !unknown { // Reserve space before setting so we can return allocation errors and be efficient on big maps. - new_len := uintptr(runtime.map_len(raw_map^)+length.?) + new_len := uintptr(min(scap, runtime.map_len(raw_map^)+length)) runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return } @@ -676,10 +711,10 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header map_backing_value := any{raw_data(elem_backing), t.value.id} - for idx := 0; unknown || idx < length.?; idx += 1 { + for idx := 0; unknown || idx < length; idx += 1 { // Decode key, keys can only be strings. key: string - if keyv, kerr := decode_key(r, v); unknown && kerr == .Break { + if keyv, kerr := decode_key(d, v); unknown && kerr == .Break { break } else if kerr != nil { err = kerr @@ -688,14 +723,14 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header key = keyv } - if unknown { + if unknown || idx > scap { // Reserve space for new element so we can return allocator errors. new_len := uintptr(runtime.map_len(raw_map^)+1) runtime.map_reserve_dynamic(raw_map, t.map_info, new_len) or_return } mem.zero_slice(elem_backing) - _unmarshal_value(r, map_backing_value, _decode_header(r) or_return) or_return + _unmarshal_value(d, map_backing_value, _decode_header(r) or_return) or_return key_ptr := rawptr(&key) key_cstr: cstring @@ -709,6 +744,10 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header // We already reserved space for it, so this shouldn't fail. assert(set_ptr != nil) } + + if .Shrink_Excess in d.flags { + _, _ = runtime.map_shrink_dynamic(raw_map, t.map_info) + } return case: @@ -719,7 +758,8 @@ _unmarshal_map :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header // Unmarshal into a union, based on the `TAG_OBJECT_TYPE` tag of the spec, it denotes a tag which // contains an array of exactly two elements, the first is a textual representation of the following // CBOR value's type. -_unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Header) -> (err: Unmarshal_Error) { +_unmarshal_union :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header) -> (err: Unmarshal_Error) { + r := d.reader #partial switch t in ti.variant { case reflect.Type_Info_Union: idhdr: Header @@ -731,8 +771,8 @@ _unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head return .Bad_Tag_Value } - n_items, unknown := err_conv(_decode_container_length(r, vadd)) or_return - if unknown || n_items != 2 { + n_items, _ := err_conv(_decode_len_container(d, vadd)) or_return + if n_items != 2 { return .Bad_Tag_Value } @@ -743,7 +783,7 @@ _unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head } context.allocator = context.temp_allocator - target_name = err_conv(_decode_text(r, idadd)) or_return + target_name = err_conv(_decode_text(d, idadd)) or_return } defer delete(target_name, context.temp_allocator) @@ -757,7 +797,7 @@ _unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head case reflect.Type_Info_Named: if vti.name == target_name { reflect.set_union_variant_raw_tag(v, tag) - return _unmarshal_value(r, any{v.data, variant.id}, _decode_header(r) or_return) + return _unmarshal_value(d, any{v.data, variant.id}, _decode_header(r) or_return) } case: @@ -769,7 +809,7 @@ _unmarshal_union :: proc(r: io.Reader, v: any, ti: ^reflect.Type_Info, hdr: Head if variant_name == target_name { reflect.set_union_variant_raw_tag(v, tag) - return _unmarshal_value(r, any{v.data, variant.id}, _decode_header(r) or_return) + return _unmarshal_value(d, any{v.data, variant.id}, _decode_header(r) or_return) } } } diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 06b96c915..23bfbd3d8 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -4,6 +4,7 @@ import "core:bytes" import "core:encoding/cbor" import "core:fmt" import "core:intrinsics" +import "core:io" import "core:math/big" import "core:mem" import "core:os" @@ -61,7 +62,9 @@ main :: proc() { test_marshalling_maybe(&t) test_marshalling_nil_maybe(&t) - test_cbor_marshalling_union(&t) + test_marshalling_union(&t) + + test_lying_length_array(&t) test_decode_unsigned(&t) test_encode_unsigned(&t) @@ -202,7 +205,7 @@ test_marshalling :: proc(t: ^testing.T) { ev(t, err, nil) defer delete(data) - decoded, derr := cbor.decode_string(string(data)) + decoded, derr := cbor.decode(string(data)) ev(t, derr, nil) defer cbor.destroy(decoded) @@ -398,7 +401,7 @@ test_marshalling_nil_maybe :: proc(t: ^testing.T) { } @(test) -test_cbor_marshalling_union :: proc(t: ^testing.T) { +test_marshalling_union :: proc(t: ^testing.T) { My_Distinct :: distinct string My_Enum :: enum { @@ -457,6 +460,14 @@ test_cbor_marshalling_union :: proc(t: ^testing.T) { } } +@(test) +test_lying_length_array :: proc(t: ^testing.T) { + // Input says this is an array of length max(u64), this should not allocate that amount. + input := []byte{0x9B, 0x00, 0x00, 0x42, 0xFA, 0x42, 0xFA, 0x42, 0xFA, 0x42} + _, err := cbor.decode(string(input)) + expect_value(t, err, io.Error.Unexpected_EOF) // .Out_Of_Memory would be bad. +} + @(test) test_decode_unsigned :: proc(t: ^testing.T) { expect_decoding(t, "\x00", "0", u8) From 7283b5e75ccecf7dbf28072456a137b29ff983af Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 16 Dec 2023 23:44:40 +0100 Subject: [PATCH 06/66] encoding/cbor: minor things --- core/encoding/cbor/cbor.odin | 6 +++++- core/encoding/cbor/tags.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 -- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 9c4bb0e4e..ddbd53c8d 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,3 +1,7 @@ +// Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary. +// Also provided are conversion to and from JSON and the CBOR diagnostic format. +// +// You can additionally provide custom CBOR tag implementations for your use cases. package cbor import "core:encoding/json" @@ -163,7 +167,7 @@ Decode_Data_Error :: enum { Nested_Tag, // When a tag's value is another tag, this is not allowed. Length_Too_Big, // When the length of a container (map, array, bytes, string) is more than `max(int)`. Disallowed_Streaming, // When the `.Disallow_Streaming` flag is set and a streaming header is encountered. - Break, + Break, // When the `break` header was found without any stream to break off. } Encode_Data_Error :: enum { diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index d2867e7be..cdb7227ef 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -89,7 +89,7 @@ tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string // Controls initialization of default tag implementations. // JS and WASI default to a panic allocator so we don't want to do it on those. -INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, ODIN_OS != .JS && ODIN_OS != .WASI) +INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_NIL_ALLOCATOR && ODIN_OS != .JS && ODIN_OS != .WASI) @(private, init, disabled=!INITIALIZE_DEFAULT_TAGS) tags_initialize_defaults :: proc() { diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 2df99ca71..dea4b749c 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -686,8 +686,6 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, return _unsupported(v, hdr) } - // TODO: shrink excess. - raw_map := (^mem.Raw_Map)(v.data) if raw_map.allocator.procedure == nil { raw_map.allocator = context.allocator From 46b58ad48d2e326c9592654e96efdf2e927dc876 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 20 Dec 2023 14:29:34 +0100 Subject: [PATCH 07/66] encoding/cbor: don't zero bytes we are going to write/read to/from anyway --- core/encoding/cbor/coding.odin | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index e39519e01..5d99aa6d2 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -266,7 +266,7 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } _decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { - buf: [1]byte + buf: [1]byte = --- io.read_full(r, buf[:]) or_return return Header(buf[0]), nil } @@ -276,7 +276,7 @@ _header_split :: proc(hdr: Header) -> (Major, Add) { } _decode_u8 :: proc(r: io.Reader) -> (v: u8, err: io.Error) { - byte: [1]byte + byte: [1]byte = --- io.read_full(r, byte[:]) or_return return byte[0], nil } @@ -310,7 +310,7 @@ _decode_tiny_u8 :: proc(additional: Add) -> (u8, Decode_Data_Error) { } _decode_u16 :: proc(r: io.Reader) -> (v: u16, err: io.Error) { - bytes: [2]byte + bytes: [2]byte = --- io.read_full(r, bytes[:]) or_return return endian.unchecked_get_u16be(bytes[:]), nil } @@ -323,7 +323,7 @@ _encode_u16 :: proc(e: Encoder, v: u16, major: Major = .Unsigned) -> Encode_Erro } _encode_u16_exact :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> (err: io.Error) { - bytes: [3]byte + bytes: [3]byte = --- bytes[0] = (u8(major) << 5) | u8(Add.Two_Bytes) endian.unchecked_put_u16be(bytes[1:], v) _, err = io.write_full(w, bytes[:]) @@ -331,7 +331,7 @@ _encode_u16_exact :: proc(w: io.Writer, v: u16, major: Major = .Unsigned) -> (er } _decode_u32 :: proc(r: io.Reader) -> (v: u32, err: io.Error) { - bytes: [4]byte + bytes: [4]byte = --- io.read_full(r, bytes[:]) or_return return endian.unchecked_get_u32be(bytes[:]), nil } @@ -344,7 +344,7 @@ _encode_u32 :: proc(e: Encoder, v: u32, major: Major = .Unsigned) -> Encode_Erro } _encode_u32_exact :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> (err: io.Error) { - bytes: [5]byte + bytes: [5]byte = --- bytes[0] = (u8(major) << 5) | u8(Add.Four_Bytes) endian.unchecked_put_u32be(bytes[1:], v) _, err = io.write_full(w, bytes[:]) @@ -352,7 +352,7 @@ _encode_u32_exact :: proc(w: io.Writer, v: u32, major: Major = .Unsigned) -> (er } _decode_u64 :: proc(r: io.Reader) -> (v: u64, err: io.Error) { - bytes: [8]byte + bytes: [8]byte = --- io.read_full(r, bytes[:]) or_return return endian.unchecked_get_u64be(bytes[:]), nil } @@ -365,7 +365,7 @@ _encode_u64 :: proc(e: Encoder, v: u64, major: Major = .Unsigned) -> Encode_Erro } _encode_u64_exact :: proc(w: io.Writer, v: u64, major: Major = .Unsigned) -> (err: io.Error) { - bytes: [9]byte + bytes: [9]byte = --- bytes[0] = (u8(major) << 5) | u8(Add.Eight_Bytes) endian.unchecked_put_u64be(bytes[1:], v) _, err = io.write_full(w, bytes[:]) @@ -556,7 +556,7 @@ _encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { for &entry, i in entries { entry.entry = m[i] - buf := strings.builder_make(0, 8, context.temp_allocator) or_return + buf := strings.builder_make(context.temp_allocator) or_return ke := e ke.writer = strings.to_stream(&buf) @@ -631,7 +631,7 @@ _encode_tag :: proc(e: Encoder, val: Tag) -> Encode_Error { } _decode_simple :: proc(r: io.Reader) -> (v: Simple, err: io.Error) { - buf: [1]byte + buf: [1]byte = --- io.read_full(r, buf[:]) or_return return Simple(buf[0]), nil } @@ -661,14 +661,14 @@ _decode_tiny_simple :: proc(add: Add) -> (Simple, Decode_Data_Error) { } _decode_f16 :: proc(r: io.Reader) -> (v: f16, err: io.Error) { - bytes: [2]byte + bytes: [2]byte = --- io.read_full(r, bytes[:]) or_return n := endian.unchecked_get_u16be(bytes[:]) return transmute(f16)n, nil } _encode_f16 :: proc(w: io.Writer, v: f16) -> (err: io.Error) { - bytes: [3]byte + bytes: [3]byte = --- bytes[0] = u8(Header.F16) endian.unchecked_put_u16be(bytes[1:], transmute(u16)v) _, err = io.write_full(w, bytes[:]) @@ -676,7 +676,7 @@ _encode_f16 :: proc(w: io.Writer, v: f16) -> (err: io.Error) { } _decode_f32 :: proc(r: io.Reader) -> (v: f32, err: io.Error) { - bytes: [4]byte + bytes: [4]byte = --- io.read_full(r, bytes[:]) or_return n := endian.unchecked_get_u32be(bytes[:]) return transmute(f32)n, nil @@ -690,7 +690,7 @@ _encode_f32 :: proc(e: Encoder, v: f32) -> io.Error { } _encode_f32_exact :: proc(w: io.Writer, v: f32) -> (err: io.Error) { - bytes: [5]byte + bytes: [5]byte = --- bytes[0] = u8(Header.F32) endian.unchecked_put_u32be(bytes[1:], transmute(u32)v) _, err = io.write_full(w, bytes[:]) @@ -698,7 +698,7 @@ _encode_f32_exact :: proc(w: io.Writer, v: f32) -> (err: io.Error) { } _decode_f64 :: proc(r: io.Reader) -> (v: f64, err: io.Error) { - bytes: [8]byte + bytes: [8]byte = --- io.read_full(r, bytes[:]) or_return n := endian.unchecked_get_u64be(bytes[:]) return transmute(f64)n, nil @@ -712,7 +712,7 @@ _encode_f64 :: proc(e: Encoder, v: f64) -> io.Error { } _encode_f64_exact :: proc(w: io.Writer, v: f64) -> (err: io.Error) { - bytes: [9]byte + bytes: [9]byte = --- bytes[0] = u8(Header.F64) endian.unchecked_put_u64be(bytes[1:], transmute(u64)v) _, err = io.write_full(w, bytes[:]) From cb8bb8bfd8df311f13d40bfc19018f70e105a1cf Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 20 Dec 2023 15:29:55 +0100 Subject: [PATCH 08/66] encoding/cbor: cleanup default temp allocator --- core/encoding/cbor/cbor.odin | 5 ++ core/encoding/cbor/coding.odin | 87 ++++++++++++++++++++++++------- core/encoding/cbor/marshal.odin | 15 +----- core/encoding/cbor/unmarshal.odin | 5 +- 4 files changed, 77 insertions(+), 35 deletions(-) diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index ddbd53c8d..9df4dfa51 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -238,6 +238,7 @@ negative_u64_to_int :: #force_inline proc(u: Negative_U64) -> i128 { // Utility for converting between the different errors when they are subsets of the other. err_conv :: proc { encode_to_marshal_err, + encode_to_marshal_err_p2, decode_to_unmarshal_err, decode_to_unmarshal_err_p, decode_to_unmarshal_err_p2, @@ -253,6 +254,10 @@ encode_to_marshal_err :: #force_inline proc(err: Encode_Error) -> Marshal_Error } } +encode_to_marshal_err_p2 :: #force_inline proc(v: $T, v2: $T2, err: Encode_Error) -> (T, T2, Marshal_Error) { + return v, v2, err_conv(err) +} + decode_to_unmarshal_err :: #force_inline proc(err: Decode_Error) -> Unmarshal_Error { switch e in err { case nil: return nil diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 5d99aa6d2..1e77a35c8 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -4,6 +4,7 @@ import "core:bytes" import "core:encoding/endian" import "core:intrinsics" import "core:io" +import "core:runtime" import "core:slice" import "core:strings" @@ -54,6 +55,9 @@ Decoder_Flag :: enum { // Makes the decoder shrink of excess capacity from allocated buffers/containers before returning. Shrink_Excess, + + // Internal flag to do initialization. + _In_Progress, } Decoder_Flags :: bit_set[Decoder_Flag] @@ -117,9 +121,8 @@ decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: V context.allocator = allocator d := d - if d.max_pre_alloc <= 0 { - d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC - } + + DECODE_PROGRESS_GUARD(&d) v, err = _decode_from_decoder(d) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. @@ -225,21 +228,9 @@ encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Enc // See the docs on the proc group `encode_into` for more info. encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { e := e + + ENCODE_PROGRESS_GUARD(&e) or_return - outer: bool - defer if outer { - e.flags &~= {._In_Progress} - } - - if ._In_Progress not_in e.flags { - outer = true - e.flags |= {._In_Progress} - - if .Self_Described_CBOR in e.flags { - _encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return - } - } - switch v_spec in v { case u8: return _encode_u8(e.writer, v_spec, .Unsigned) case u16: return _encode_u16(e, v_spec, .Unsigned) @@ -265,6 +256,66 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } } +@(deferred_in_out=_decode_progress_end) +DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { + if ._In_Progress in d.flags { + return + } + is_begin = true + + incl_elem(&d.flags, Decoder_Flag._In_Progress) + + if context.allocator != context.temp_allocator { + tmp = runtime.default_temp_allocator_temp_begin() + } + + if d.max_pre_alloc <= 0 { + d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC + } + + return +} + +_decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Temp) { + if !is_begin { + return + } + + excl_elem(&d.flags, Decoder_Flag._In_Progress) + + runtime.default_temp_allocator_temp_end(tmp) +} + +@(deferred_in_out=_encode_progress_end) +ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { + if ._In_Progress in e.flags { + return + } + is_begin = true + + incl_elem(&e.flags, Encoder_Flag._In_Progress) + + if context.allocator != context.temp_allocator { + tmp = runtime.default_temp_allocator_temp_begin() + } + + if .Self_Described_CBOR in e.flags { + _encode_u64(e^, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return + } + + return +} + +_encode_progress_end :: proc(e: ^Encoder, is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { + if !is_begin || err != nil { + return + } + + excl_elem(&e.flags, Encoder_Flag._In_Progress) + + runtime.default_temp_allocator_temp_end(tmp) +} + _decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { buf: [1]byte = --- io.read_full(r, buf[:]) or_return @@ -514,7 +565,7 @@ _decode_map :: proc(d: Decoder, add: Add) -> (v: Map, err: Decode_Error) { return nil, kerr } - value := decode_from_decoder(d) or_return + value := _decode_from_decoder(d) or_return append(&items, Map_Entry{ key = key, diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index a5d5efb3e..898371adf 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -77,21 +77,8 @@ marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Mars // See docs on the `marshal_into` proc group for more info. marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { e := e - - init: bool - defer if init { - e.flags &~= {._In_Progress} - } - - // If not in progress we do initialization and set in progress. - if ._In_Progress not_in e.flags { - init = true - e.flags |= {._In_Progress} - if .Self_Described_CBOR in e.flags { - err_conv(_encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag)) or_return - } - } + err_conv(ENCODE_PROGRESS_GUARD(&e)) or_return if v == nil { return _encode_nil(e.writer) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index dea4b749c..c7de2d87a 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -52,9 +52,8 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { d := d - if d.max_pre_alloc <= 0 { - d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC - } + + DECODE_PROGRESS_GUARD(&d) err = _unmarshal_any_ptr(d, ptr, allocator=allocator) From 85f1a60cf301abab292e1dab65e19c61c5612e8e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 20 Dec 2023 16:08:27 +0100 Subject: [PATCH 09/66] encoding/cbor: cleanup comments about tags --- core/encoding/cbor/tags.odin | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index cdb7227ef..38649f634 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -13,29 +13,35 @@ import "core:time" // Tags defined in RFC 7049 that we provide implementations for. // UTC time in seconds, unmarshalled into a `core:time` `time.Time` or integer. +// Use the struct tag `cbor_tag:"1"` or `cbor_tag:"epoch"` to have your `time.Time` field en/decoded as epoch time. TAG_EPOCH_TIME_NR :: 1 TAG_EPOCH_TIME_ID :: "epoch" // Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. -TAG_UNSIGNED_BIG_NR :: 2 +// These fields use this tag by default, no struct tag required. +TAG_UNSIGNED_BIG_NR :: 2 // Using `core:math/big`, big integers are properly encoded and decoded during marshal and unmarshal. -TAG_NEGATIVE_BIG_NR :: 3 +// These fields use this tag by default, no struct tag required. +TAG_NEGATIVE_BIG_NR :: 3 // TAG_DECIMAL_FRACTION :: 4 // NOTE: We could probably implement this with `math/fixed`. // Sometimes it is beneficial to carry an embedded CBOR data item that is not meant to be decoded // immediately at the time the enclosing data item is being decoded. Tag number 24 (CBOR data item) // can be used to tag the embedded byte string as a single data item encoded in CBOR format. +// Use the struct tag `cbor_tag:"24"` or `cbor_tag:"cbor"` to keep a non-decoded field (string or bytes) of raw CBOR. TAG_CBOR_NR :: 24 TAG_CBOR_ID :: "cbor" // The contents of this tag are base64 encoded during marshal and decoded during unmarshal. +// Use the struct tag `cbor_tag:"34"` or `cbor_tag:"base64"` to have your field string or bytes field en/decoded as base64. TAG_BASE64_NR :: 34 TAG_BASE64_ID :: "base64" // A tag that is used to detect the contents of a binary buffer (like a file) are CBOR. // This tag would wrap everything else, decoders can then check for this header and see if the // given content is definitely CBOR. +// Added by the encoder if it has the flag `.Self_Described_CBOR`, decoded by default. TAG_SELF_DESCRIBED_CBOR :: 55799 // A tag that is used to assign a textual type to the object following it. @@ -99,19 +105,14 @@ tags_initialize_defaults :: proc() { // Registers tags that have implementations provided by this package. // This is done by default and can be controlled with the `CBOR_INITIALIZE_DEFAULT_TAGS` define. tags_register_defaults :: proc() { - // NOTE: Not registering this the other way around, user can opt-in using the `cbor_tag:"1"` struct - // tag instead, it would lose precision and marshalling the `time.Time` struct normally is valid. - tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID) - - // Use the struct tag `cbor_tag:"34"` to have your field encoded in a base64. - tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID) - - // Use the struct tag `cbor_tag:"24"` to keep a non-decoded field of raw CBOR. - tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID) + tag_register_number({nil, tag_time_unmarshal, tag_time_marshal}, TAG_EPOCH_TIME_NR, TAG_EPOCH_TIME_ID) + tag_register_number({nil, tag_base64_unmarshal, tag_base64_marshal}, TAG_BASE64_NR, TAG_BASE64_ID) + tag_register_number({nil, tag_cbor_unmarshal, tag_cbor_marshal}, TAG_CBOR_NR, TAG_CBOR_ID) // These following tags are registered at the type level and don't require an opt-in struct tag. // Encoding these types on its own make no sense or no data is lost to encode it. - + + // En/Decoding of `big.Int` fields by default. tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_UNSIGNED_BIG_NR, big.Int) tag_register_type({nil, tag_big_unmarshal, tag_big_marshal}, TAG_NEGATIVE_BIG_NR, big.Int) } From 3fccc77829d6479b972026c5fee7ef0f34ac589e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 20 Dec 2023 16:20:33 +0100 Subject: [PATCH 10/66] encoding/cbor: clean and fixup some allocations --- core/encoding/cbor/coding.odin | 8 +++--- core/encoding/cbor/tags.odin | 5 +--- core/encoding/cbor/unmarshal.odin | 44 +++++++++++++------------------ 3 files changed, 25 insertions(+), 32 deletions(-) diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 1e77a35c8..32ecf52bc 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -431,7 +431,9 @@ _decode_bytes_ptr :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: ^By return } -_decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: Bytes, err: Decode_Error) { +_decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes, allocator := context.allocator) -> (v: Bytes, err: Decode_Error) { + context.allocator = allocator + n, scap := _decode_len_str(d, add) or_return buf := strings.builder_make(0, scap) or_return @@ -487,8 +489,8 @@ _decode_text_ptr :: proc(d: Decoder, add: Add) -> (v: ^Text, err: Decode_Error) return } -_decode_text :: proc(d: Decoder, add: Add) -> (v: Text, err: Decode_Error) { - return (Text)(_decode_bytes(d, add, .Text) or_return), nil +_decode_text :: proc(d: Decoder, add: Add, allocator := context.temp_allocator) -> (v: Text, err: Decode_Error) { + return (Text)(_decode_bytes(d, add, .Text, allocator) or_return), nil } _encode_text :: proc(e: Encoder, val: Text) -> Encode_Error { diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 38649f634..efe724f8c 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -293,10 +293,7 @@ tag_base64_unmarshal :: proc(_: ^Tag_Implementation, d: Decoder, _: Tag_Number, return .Bad_Tag_Value } - bytes: string; { - context.allocator = context.temp_allocator - bytes = string(err_conv(_decode_bytes(d, add)) or_return) - } + bytes := string(err_conv(_decode_bytes(d, add, allocator=context.temp_allocator)) or_return) defer delete(bytes, context.temp_allocator) #partial switch t in ti.variant { diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index c7de2d87a..ae7f97c98 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -370,9 +370,8 @@ _unmarshal_bytes :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header if elem_base.id != byte { return _unsupported(v, hdr) } - context.allocator = context.temp_allocator - bytes := err_conv(_decode_bytes(d, add)) or_return - defer delete(bytes) + bytes := err_conv(_decode_bytes(d, add, allocator=context.temp_allocator)) or_return + defer delete(bytes, context.temp_allocator) if len(bytes) > t.count { return _unsupported(v, hdr) } @@ -404,9 +403,8 @@ _unmarshal_string :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Heade // Enum by its variant name. case reflect.Type_Info_Enum: - context.allocator = context.temp_allocator - text := err_conv(_decode_text(d, add)) or_return - defer delete(text) + text := err_conv(_decode_text(d, add, allocator=context.temp_allocator)) or_return + defer delete(text, context.temp_allocator) for name, i in t.names { if name == text { @@ -416,9 +414,8 @@ _unmarshal_string :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Heade } case reflect.Type_Info_Rune: - context.allocator = context.temp_allocator - text := err_conv(_decode_text(d, add)) or_return - defer delete(text) + text := err_conv(_decode_text(d, add, allocator=context.temp_allocator)) or_return + defer delete(text, context.temp_allocator) r := (^rune)(v.data) dr, n := utf8.decode_rune(text) @@ -585,7 +582,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header case quaternion64: info = type_info_of(f16) case quaternion128: info = type_info_of(f32) case quaternion256: info = type_info_of(f64) - case: unreachable() + case: unreachable() } out_of_space := assign_array(d, &da, info, 4, growable=false) or_return @@ -598,15 +595,15 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, add: Add) -> (err: Unmarshal_Error) { r := d.reader - decode_key :: proc(d: Decoder, v: any) -> (k: string, err: Unmarshal_Error) { + decode_key :: proc(d: Decoder, v: any, allocator := context.allocator) -> (k: string, err: Unmarshal_Error) { entry_hdr := _decode_header(d.reader) or_return entry_maj, entry_add := _header_split(entry_hdr) #partial switch entry_maj { case .Text: - k = err_conv(_decode_text(d, entry_add)) or_return + k = err_conv(_decode_text(d, entry_add, allocator)) or_return return case .Bytes: - bytes := err_conv(_decode_bytes(d, entry_add)) or_return + bytes := err_conv(_decode_bytes(d, entry_add, allocator=allocator)) or_return k = string(bytes) return case: @@ -637,16 +634,14 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, for idx := 0; idx < len(fields) && (unknown || idx < length); idx += 1 { // Decode key, keys can only be strings. - key: string; { - context.allocator = context.temp_allocator - if keyv, kerr := decode_key(d, v); unknown && kerr == .Break { - break - } else if kerr != nil { - err = kerr - return - } else { - key = keyv - } + key: string + if keyv, kerr := decode_key(d, v, context.temp_allocator); unknown && kerr == .Break { + break + } else if kerr != nil { + err = kerr + return + } else { + key = keyv } defer delete(key, context.temp_allocator) @@ -779,8 +774,7 @@ _unmarshal_union :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return .Bad_Tag_Value } - context.allocator = context.temp_allocator - target_name = err_conv(_decode_text(d, idadd)) or_return + target_name = err_conv(_decode_text(d, idadd, context.temp_allocator)) or_return } defer delete(target_name, context.temp_allocator) From 154e0d41c6f77feb8a11ff8a6cb4449c11dd767e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 18:11:52 +0100 Subject: [PATCH 11/66] encoding/cbor: fix wrong allocator bug --- core/encoding/cbor/coding.odin | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 32ecf52bc..ee928f68e 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -100,10 +100,9 @@ decode :: decode_from // Decodes the given string as CBOR. // See docs on the proc group `decode` for more information. decode_from_string :: proc(s: string, flags: Decoder_Flags = {}, allocator := context.allocator) -> (v: Value, err: Decode_Error) { - context.allocator = allocator r: strings.Reader strings.reader_init(&r, s) - return decode_from_reader(strings.reader_to_stream(&r), flags) + return decode_from_reader(strings.reader_to_stream(&r), flags, allocator) } // Reads a CBOR value from the given reader. @@ -489,7 +488,7 @@ _decode_text_ptr :: proc(d: Decoder, add: Add) -> (v: ^Text, err: Decode_Error) return } -_decode_text :: proc(d: Decoder, add: Add, allocator := context.temp_allocator) -> (v: Text, err: Decode_Error) { +_decode_text :: proc(d: Decoder, add: Add, allocator := context.allocator) -> (v: Text, err: Decode_Error) { return (Text)(_decode_bytes(d, add, .Text, allocator) or_return), nil } From 72d5b87b52fd4a1fb92819121e7f17b9118dac99 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 18:12:13 +0100 Subject: [PATCH 12/66] encoding/cbor: clean --- core/encoding/cbor/coding.odin | 5 ++--- tests/core/encoding/cbor/test_core_cbor.odin | 23 ++++++-------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index ee928f68e..9dd6d2639 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -316,9 +316,8 @@ _encode_progress_end :: proc(e: ^Encoder, is_begin: bool, tmp: runtime.Arena_Tem } _decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { - buf: [1]byte = --- - io.read_full(r, buf[:]) or_return - return Header(buf[0]), nil + hdr = Header(_decode_u8(r) or_return) + return } _header_split :: proc(hdr: Header) -> (Major, Add) { diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 23bfbd3d8..0fb8b521f 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -799,15 +799,8 @@ test_encode_tags :: proc(t: ^testing.T) { // Helpers -buf: bytes.Buffer -stream := bytes.buffer_to_stream(&buf) -encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream} - expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: typeid, loc := #caller_location) { - bytes.buffer_reset(&buf) - bytes.buffer_write_string(&buf, encoded) - - res, err := cbor.decode(stream) + res, err := cbor.decode(encoded) defer cbor.destroy(res) expect_value(t, reflect.union_variant_typeid(res), type, loc) @@ -820,10 +813,7 @@ expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: t } expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_decoded: string, loc := #caller_location) { - bytes.buffer_reset(&buf) - bytes.buffer_write_string(&buf, encoded) - - res, err := cbor.decode(stream) + res, err := cbor.decode(encoded) defer cbor.destroy(res) expect_value(t, err, nil, loc) @@ -841,10 +831,7 @@ expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_de } expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #caller_location) where intrinsics.type_is_float(T) { - bytes.buffer_reset(&buf) - bytes.buffer_write_string(&buf, encoded) - - res, err := cbor.decode(stream) + res, err := cbor.decode(encoded) defer cbor.destroy(res) expect_value(t, reflect.union_variant_typeid(res), typeid_of(T), loc) @@ -862,6 +849,10 @@ expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #calle } } +buf: bytes.Buffer +stream := bytes.buffer_to_stream(&buf) +encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream} + expect_encoding :: proc(t: ^testing.T, val: cbor.Value, encoded: string, loc := #caller_location) { bytes.buffer_reset(&buf) From 7854aa22d99b2c0340f4352f133ce06fd1b80df6 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 18:40:27 +0100 Subject: [PATCH 13/66] encoding/cbor: fix unused import --- tests/core/encoding/hex/test_core_hex.odin | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/core/encoding/hex/test_core_hex.odin b/tests/core/encoding/hex/test_core_hex.odin index a5daa206e..d928cd28e 100644 --- a/tests/core/encoding/hex/test_core_hex.odin +++ b/tests/core/encoding/hex/test_core_hex.odin @@ -4,7 +4,6 @@ import "core:encoding/hex" import "core:testing" import "core:fmt" import "core:os" -import "core:bytes" TEST_count := 0 TEST_fail := 0 From 759d095548e7135bbfeb68ac6b0a21857af49527 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 18:52:53 +0100 Subject: [PATCH 14/66] encoding/cbor: ignore struct fields with `cbor:"-"` --- core/encoding/cbor/marshal.odin | 37 ++++++++++++++------ core/encoding/cbor/unmarshal.odin | 4 +++ tests/core/encoding/cbor/test_core_cbor.odin | 3 ++ 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 898371adf..deb7ba020 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -422,7 +422,13 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { case Tag: return err_conv(_encode_tag(e, vv)) } - err_conv(_encode_u16(e, u16(len(info.names)), .Map)) or_return + field_name :: #force_inline proc(info: runtime.Type_Info_Struct, i: int) -> string { + if cbor_name := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor")); cbor_name != "" { + return cbor_name + } else { + return info.names[i] + } + } marshal_entry :: #force_inline proc(e: Encoder, info: runtime.Type_Info_Struct, v: any, name: string, i: int) -> Marshal_Error { err_conv(_encode_text(e, name)) or_return @@ -448,13 +454,14 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { return marshal_into(e, field_any) } - - field_name :: #force_inline proc(info: runtime.Type_Info_Struct, i: int) -> string { - if cbor_name := string(reflect.struct_tag_get(reflect.Struct_Tag(info.tags[i]), "cbor")); cbor_name != "" { - return cbor_name - } else { - return info.names[i] + + n: u64; { + for _, i in info.names { + if field_name(info, i) != "-" { + n += 1 + } } + err_conv(_encode_u64(e, n, .Map)) or_return } if .Deterministic_Map_Sorting in e.flags { @@ -462,11 +469,16 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { name: string, field: int, } - entries := make([dynamic]Name, 0, len(info.names), context.temp_allocator) or_return + entries := make([dynamic]Name, 0, n, context.temp_allocator) or_return defer delete(entries) for name, i in info.names { - append(&entries, Name{field_name(info, i), i}) or_return + fname := field_name(info, i) + if fname == "-" { + continue + } + + append(&entries, Name{fname, i}) or_return } // Sort lexicographic on the bytes of the key. @@ -479,7 +491,12 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { } } else { for name, i in info.names { - marshal_entry(e, info, v, field_name(info, i), i) or_return + fname := field_name(info, i) + if fname == "-" { + continue + } + + marshal_entry(e, info, v, fname, i) or_return } } return diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index ae7f97c98..9ad25a38d 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -650,6 +650,10 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, { for field, field_idx in fields { tag_value := string(reflect.struct_tag_get(field.tag, "cbor")) + if tag_value == "-" { + continue + } + if key == tag_value { use_field_idx = field_idx break diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 0fb8b521f..daf31c277 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -130,6 +130,7 @@ Foo :: struct { small_onetwenty: i128, biggest: big.Int, smallest: big.Int, + ignore_this: ^Foo `cbor:"-"`, } FooBar :: enum { @@ -189,6 +190,7 @@ test_marshalling :: proc(t: ^testing.T) { smallie = cbor.Negative_U64(max(u64)), onetwenty = i128(12345), small_onetwenty = -i128(max(u64)), + ignore_this = &Foo{}, } big.atoi(&f.biggest, "1234567891011121314151617181920") @@ -343,6 +345,7 @@ test_marshalling :: proc(t: ^testing.T) { ev(t, backf.smallie, f.smallie) ev(t, backf.onetwenty, f.onetwenty) ev(t, backf.small_onetwenty, f.small_onetwenty) + ev(t, backf.ignore_this, nil) s_equals, s_err := big.equals(&backf.smallest, &f.smallest) ev(t, s_err, nil) From 317931a3c5179e10db941157a994c8e89b7080c2 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 19:22:33 +0100 Subject: [PATCH 15/66] encoding/cbor: deterministically store bit sets as big endian --- core/encoding/cbor/cbor.odin | 18 ----------------- core/encoding/cbor/marshal.odin | 3 ++- core/encoding/cbor/unmarshal.odin | 13 ++++++------ core/reflect/reflect.odin | 21 ++++++++++++++++++++ tests/core/encoding/cbor/test_core_cbor.odin | 2 +- 5 files changed, 30 insertions(+), 27 deletions(-) diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 9df4dfa51..3ab493b4b 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -675,21 +675,3 @@ _i128_to_uint :: proc(v: i128) -> (u: u64, m: Major, err: Encode_Data_Error) { u, err = _u128_to_u64(u128(v)) return } - -@(private) -is_bit_set_different_endian_to_platform :: proc(ti: ^runtime.Type_Info) -> bool { - if ti == nil { - return false - } - t := runtime.type_info_base(ti) - #partial switch info in t.variant { - case runtime.Type_Info_Integer: - switch info.endianness { - case .Platform: return false - case .Little: return ODIN_ENDIAN != .Little - case .Big: return ODIN_ENDIAN != .Big - } - } - return false -} - diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index deb7ba020..b7c47f252 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -541,7 +541,8 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { return marshal_into(e, any{v.data, info.base.id}) case runtime.Type_Info_Bit_Set: - do_byte_swap := is_bit_set_different_endian_to_platform(info.underlying) + // Store bit_set as big endian just like the protocol. + do_byte_swap := !reflect.bit_set_is_big_endian(v) switch ti.size * 8 { case 0: return _encode_u8(e.writer, 0) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 9ad25a38d..98ef06635 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -856,12 +856,11 @@ _assign_int :: proc(val: any, i: $T) -> bool { case uintptr: dst = uintptr(i) case: ti := type_info_of(v.id) - do_byte_swap := is_bit_set_different_endian_to_platform(ti) - #partial switch info in ti.variant { - case runtime.Type_Info_Bit_Set: + if _, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok { + do_byte_swap := !reflect.bit_set_is_big_endian(v) switch ti.size * 8 { - case 0: - case 8: + case 0: // no-op. + case 8: x := (^u8)(v.data) x^ = u8(i) case 16: @@ -876,9 +875,9 @@ _assign_int :: proc(val: any, i: $T) -> bool { case: panic("unknown bit_size size") } - case: - return false + return true } + return false } return true } diff --git a/core/reflect/reflect.odin b/core/reflect/reflect.odin index de5dec2e3..de7379ecc 100644 --- a/core/reflect/reflect.odin +++ b/core/reflect/reflect.odin @@ -934,6 +934,27 @@ set_union_value :: proc(dst: any, value: any) -> bool { panic("expected a union to reflect.set_union_variant_typeid") } +@(require_results) +bit_set_is_big_endian :: proc(value: any, loc := #caller_location) -> bool { + if value == nil { return ODIN_ENDIAN == .Big } + + ti := runtime.type_info_base(type_info_of(value.id)) + if info, ok := ti.variant.(runtime.Type_Info_Bit_Set); ok { + if info.underlying == nil { return ODIN_ENDIAN == .Big } + + underlying_ti := runtime.type_info_base(info.underlying) + if underlying_info, uok := underlying_ti.variant.(runtime.Type_Info_Integer); uok { + switch underlying_info.endianness { + case .Platform: return ODIN_ENDIAN == .Big + case .Little: return false + case .Big: return true + } + } + + return ODIN_ENDIAN == .Big + } + panic("expected a bit_set to reflect.bit_set_is_big_endian", loc) +} @(require_results) diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index daf31c277..691a0a5ec 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -251,7 +251,7 @@ test_marshalling :: proc(t: ^testing.T) { ], "cstr": "Hellnope", "ennie": 0, - "ennieb": 2, + "ennieb": 512, "iamint": -256, "important": "!", "my_bytes": h'', From c1cf6c1a95bb489525e329280be735d7a5ce966b Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Sat, 23 Dec 2023 20:02:04 +0100 Subject: [PATCH 16/66] encoding/cbor: add general docs and example --- core/encoding/cbor/cbor.odin | 5 -- core/encoding/cbor/coding.odin | 8 +- core/encoding/cbor/doc.odin | 143 ++++++++++++++++++++++++++++++ core/encoding/cbor/marshal.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 +- 5 files changed, 149 insertions(+), 11 deletions(-) create mode 100644 core/encoding/cbor/doc.odin diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 3ab493b4b..7e0f4ea1a 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,14 +1,9 @@ -// Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary. -// Also provided are conversion to and from JSON and the CBOR diagnostic format. -// -// You can additionally provide custom CBOR tag implementations for your use cases. package cbor import "core:encoding/json" import "core:intrinsics" import "core:io" import "core:mem" -import "core:runtime" import "core:strconv" import "core:strings" diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 9dd6d2639..a9bb6e408 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -121,7 +121,7 @@ decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: V d := d - DECODE_PROGRESS_GUARD(&d) + _DECODE_PROGRESS_GUARD(&d) v, err = _decode_from_decoder(d) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. @@ -228,7 +228,7 @@ encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Enc encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { e := e - ENCODE_PROGRESS_GUARD(&e) or_return + _ENCODE_PROGRESS_GUARD(&e) or_return switch v_spec in v { case u8: return _encode_u8(e.writer, v_spec, .Unsigned) @@ -256,7 +256,7 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } @(deferred_in_out=_decode_progress_end) -DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { +_DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { if ._In_Progress in d.flags { return } @@ -286,7 +286,7 @@ _decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Tem } @(deferred_in_out=_encode_progress_end) -ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { +_ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { if ._In_Progress in e.flags { return } diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin new file mode 100644 index 000000000..efcad5c9e --- /dev/null +++ b/core/encoding/cbor/doc.odin @@ -0,0 +1,143 @@ +/* +Package cbor encodes, decodes, marshals and unmarshals types from/into RCF 8949 compatible CBOR binary. +Also provided are conversion to and from JSON and the CBOR diagnostic format. + +**Allocations:** + +In general, when in the following table it says allocations are done on the `context.temp_allocator`, these allocations +are still attempted to be deallocated. +This allows you to use an allocator with freeing implemented as the `context.temp_allocator` which is handy with big CBOR. + +If you use the default `context.temp_allocator` it will be returned back to its state when the process (en/decoding, (un)marshal) started. + +- *Encoding*: If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on `context.temp_allocator` + some space for the keys of maps in order to sort them and then write them. + Other than that there are no allocations (only for the final bytes if you use `cbor.encode_into_bytes`. + +- *Decoding*: Allocates everything on the given allocator and input given can be deleted after decoding. + *No* allocations are done on the `context.temp_allocator`. + +- *Marshal*: Same allocation strategy as encoding. + +- *Unmarshal*: Allocates everything on the given allocator and input given can be deleted after unmarshalling. + Some temporary allocations are done on the `context.temp_allocator`. + +**Determinism:** + +CBOR defines a deterministic en/decoder, which among other things uses the smallest type possible for integers and floats, +and sorts map keys by their (encoded) lexical bytewise order. + +You can enable this behaviour using a combination of flags, also available as the `cbor.ENCODE_FULLY_DETERMINISTIC` constant. +If you just want the small size that comes with this, but not the map sorting (which has a performance cost) you can use the +`cbor.ENCODE_SMALL` constant for the flags. + +A deterministic float is a float in the smallest type (f16, f32, f64) that hasn't changed after conversion. +A deterministic integer is an integer in the smallest representation (u8, u16, u32, u64) it fits in. + +**Untrusted Input:** + +By default input is treated as untrusted, this means the sizes that are encoded in the CBOR are not blindly trusted. +If you were to trust these sizes, and allocate space for them an attacker would be able to cause massive allocations with small payloads. + +The decoder has a `max_pre_alloc` field that specifies the maximum amount of bytes (roughly) to pre allocate, a KiB by default. + +This does mean reallocations are more common though, you can, if you know the input is trusted, add the `.Trusted_Input` flag to the decoder. + +**Tags:** + +CBOR describes tags that you can wrap values with to assign a number to describe what type of data will follow. + +More information and a list of default tags can be found here: [[RFC 8949 Section 3.4;https://www.rfc-editor.org/rfc/rfc8949.html#name-tagging-of-items]]. + +A list of registered extension types can be found here: [[IANA CBOR assignments;https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml]]. + +Tags can either be assigned to a distinct Odin type (used by default), +or be used with struct tags (`cbor_tag:"base64"`, or `cbor_tag:"1"` for example). + +By default, the following tags are supported/provided by this implementation: + +- *1/epoch*: Assign this tag to `time.Time` or integer fields to use the defined seconds since epoch format. + +- *24/cbor*: Assign this tag to string or byte fields to store encoded CBOR (not decoding it). + +- *34/base64*: Assign this tag to string or byte fields to store and decode the contents in base64. + +- *2 & 3*: Used automatically by the implementation to encode and decode big numbers into/from `core:math/big`. + +- *55799*: Self described CBOR, used when `.Self_Described_CBOR` flag is used to wrap the entire binary. + This shows other implementations that we are dealing with CBOR by just looking at the first byte of input. + +- *1010*: An extension tag that defines a string type followed by its value, this is used by this implementation to support Odin's unions. + +Users can provide their own tag implementations using the `cbor.tag_register_type(...)` to register a tag for a distinct Odin type +used automatically when it is encountered during marshal and unmarshal. +Or with `cbor.tag_register_number(...)` to register a tag number along with an identifier for convenience that can be used with struct tags, +e.g. `cbor_tag:"69"` or `cbor_tag:"my_tag"`. + +You can look at the default tags provided for pointers on how these implementations work. + +Example: + package main + + import "core:encoding/cbor" + import "core:fmt" + import "core:time" + + Possibilities :: union { + string, + int, + } + + Data :: struct { + str: string, + neg: cbor.Negative_U16, // Store a CBOR value directly. + now: time.Time `cbor_tag:"epoch"`, // Wrapped in the epoch tag. + ignore_this: ^Data `cbor:"-"`, // Ignored by implementation. + renamed: f32 `cbor:"renamed :)"`, // Renamed when encoded. + my_union: Possibilities, // Union support. + } + + main :: proc() { + now := time.Time{_nsec = 1701117968 * 1e9} + + data := Data{ + str = "Hello, World!", + neg = 300, + now = now, + ignore_this = &Data{}, + renamed = 123123.125, + my_union = 3, + } + + // Marshal the struct into binary CBOR. + binary, err := cbor.marshal(data, cbor.ENCODE_FULLY_DETERMINISTIC) + assert(err == nil) + defer delete(binary) + + // Decode the binary data into a `cbor.Value`. + decoded, derr := cbor.decode(string(binary)) + assert(derr == nil) + defer cbor.destroy(decoded) + + // Turn the CBOR into a human readable representation. + diagnosis, eerr := cbor.diagnose(decoded) + assert(eerr == nil) + defer delete(diagnosis) + + fmt.println(diagnosis) + } + +Output: + { + "my_union": 1010([ + "int", + 3 + ]), + "neg": -301, + "now": 1(1701117968), + "renamed :)": 123123.12500000, + "str": "Hello, World!" + } +*/ +package cbor + diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index b7c47f252..4a0619c04 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -78,7 +78,7 @@ marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Mars marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { e := e - err_conv(ENCODE_PROGRESS_GUARD(&e)) or_return + err_conv(_ENCODE_PROGRESS_GUARD(&e)) or_return if v == nil { return _encode_nil(e.writer) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 98ef06635..0acb48083 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -53,7 +53,7 @@ unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, all unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { d := d - DECODE_PROGRESS_GUARD(&d) + _DECODE_PROGRESS_GUARD(&d) err = _unmarshal_any_ptr(d, ptr, allocator=allocator) From c4e45d509a25ad1d341a5519606ddff59bfeb64e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Wed, 17 Jan 2024 00:03:35 +0100 Subject: [PATCH 17/66] encoding/cbor: adhere to new quaternion rules of master --- core/encoding/cbor/unmarshal.odin | 6 +++--- tests/core/encoding/cbor/test_core_cbor.odin | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 0acb48083..eec999c12 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -903,9 +903,9 @@ _assign_float :: proc(val: any, f: $T) -> bool { case complex64: dst = complex(f32(f), 0) case complex128: dst = complex(f64(f), 0) - case quaternion64: dst = quaternion(f16(f), 0, 0, 0) - case quaternion128: dst = quaternion(f32(f), 0, 0, 0) - case quaternion256: dst = quaternion(f64(f), 0, 0, 0) + case quaternion64: dst = quaternion(w=f16(f), x=0, y=0, z=0) + case quaternion128: dst = quaternion(w=f32(f), x=0, y=0, z=0) + case quaternion256: dst = quaternion(w=f64(f), x=0, y=0, z=0) case: return false } diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 691a0a5ec..e7a3ef419 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -177,7 +177,7 @@ test_marshalling :: proc(t: ^testing.T) { ennie = .EFoo, ennieb = {.EBar}, - quat = quaternion(16, 17, 18, 19), + quat = quaternion(w=16, x=17, y=18, z=19), comp = complex(32, 33), important = '!', From a664d9804f64f7f9d6cb4a8bbe2e618297663c60 Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 19:17:07 +0100 Subject: [PATCH 18/66] encoding/cbor: remove usage of incl_elem and excl_elem --- core/encoding/cbor/coding.odin | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index a9bb6e408..a5f21af1f 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -262,7 +262,7 @@ _DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Are } is_begin = true - incl_elem(&d.flags, Decoder_Flag._In_Progress) + d.flags |= { ._In_Progress } if context.allocator != context.temp_allocator { tmp = runtime.default_temp_allocator_temp_begin() @@ -280,7 +280,7 @@ _decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Tem return } - excl_elem(&d.flags, Decoder_Flag._In_Progress) + d.flags &~= { ._In_Progress } runtime.default_temp_allocator_temp_end(tmp) } @@ -292,7 +292,7 @@ _ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Are } is_begin = true - incl_elem(&e.flags, Encoder_Flag._In_Progress) + e.flags |= { ._In_Progress } if context.allocator != context.temp_allocator { tmp = runtime.default_temp_allocator_temp_begin() @@ -310,7 +310,7 @@ _encode_progress_end :: proc(e: ^Encoder, is_begin: bool, tmp: runtime.Arena_Tem return } - excl_elem(&e.flags, Encoder_Flag._In_Progress) + e.flags &~= { ._In_Progress } runtime.default_temp_allocator_temp_end(tmp) } From 0076c07076783e5256a501e9dc37a803757ea577 Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 19:20:18 +0100 Subject: [PATCH 19/66] encoding/cbor: core -> base --- core/encoding/cbor/cbor.odin | 3 ++- core/encoding/cbor/coding.odin | 5 +++-- core/encoding/cbor/marshal.odin | 5 +++-- core/encoding/cbor/tags.odin | 3 ++- core/encoding/cbor/unmarshal.odin | 5 +++-- tests/core/encoding/base64/base64.odin | 3 ++- tests/core/encoding/cbor/test_core_cbor.odin | 3 ++- 7 files changed, 17 insertions(+), 10 deletions(-) diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 7e0f4ea1a..f879a11aa 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,7 +1,8 @@ package cbor +import "base:intrinsics" + import "core:encoding/json" -import "core:intrinsics" import "core:io" import "core:mem" import "core:strconv" diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index a5f21af1f..5719078c7 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -1,10 +1,11 @@ package cbor +import "base:intrinsics" +import "base:runtime" + import "core:bytes" import "core:encoding/endian" -import "core:intrinsics" import "core:io" -import "core:runtime" import "core:slice" import "core:strings" diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 4a0619c04..7d93088cb 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -1,11 +1,12 @@ package cbor +import "base:intrinsics" +import "base:runtime" + import "core:bytes" -import "core:intrinsics" import "core:io" import "core:mem" import "core:reflect" -import "core:runtime" import "core:slice" import "core:strconv" import "core:strings" diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index efe724f8c..c9ddaed56 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -1,12 +1,13 @@ package cbor +import "base:runtime" + import "core:encoding/base64" import "core:io" import "core:math" import "core:math/big" import "core:mem" import "core:reflect" -import "core:runtime" import "core:strings" import "core:time" diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index eec999c12..eef5d3d99 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -1,10 +1,11 @@ package cbor -import "core:intrinsics" +import "base:intrinsics" +import "base:runtime" + import "core:io" import "core:mem" import "core:reflect" -import "core:runtime" import "core:strings" import "core:unicode/utf8" diff --git a/tests/core/encoding/base64/base64.odin b/tests/core/encoding/base64/base64.odin index 41dbba683..e48eea020 100644 --- a/tests/core/encoding/base64/base64.odin +++ b/tests/core/encoding/base64/base64.odin @@ -1,8 +1,9 @@ package test_encoding_base64 +import "base:intrinsics" + import "core:encoding/base64" import "core:fmt" -import "core:intrinsics" import "core:os" import "core:reflect" import "core:testing" diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index e7a3ef419..8262e5da4 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -1,9 +1,10 @@ package test_encoding_cbor +import "base:intrinsics" + import "core:bytes" import "core:encoding/cbor" import "core:fmt" -import "core:intrinsics" import "core:io" import "core:math/big" import "core:mem" From b11d839fb6dab106a557cf65257e31644a84725d Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 20:13:30 +0100 Subject: [PATCH 20/66] encoding/cbor: make temp allocations more explicit --- core/encoding/cbor/coding.odin | 108 +++++-------------- core/encoding/cbor/doc.odin | 12 +-- core/encoding/cbor/marshal.odin | 37 ++++--- core/encoding/cbor/unmarshal.odin | 19 ++-- tests/core/encoding/cbor/test_core_cbor.odin | 2 +- 5 files changed, 63 insertions(+), 115 deletions(-) diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 5719078c7..abb832ccf 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -26,9 +26,6 @@ Encoder_Flag :: enum { // NOTE: In order to do this, all keys of a map have to be pre-computed, sorted, and // then written, this involves temporary allocations for the keys and a copy of the map itself. Deterministic_Map_Sorting, - - // Internal flag to do initialization. - _In_Progress, } Encoder_Flags :: bit_set[Encoder_Flag] @@ -40,8 +37,9 @@ ENCODE_FULLY_DETERMINISTIC :: Encoder_Flags{.Deterministic_Int_Size, .Determinis ENCODE_SMALL :: Encoder_Flags{.Deterministic_Int_Size, .Deterministic_Float_Size} Encoder :: struct { - flags: Encoder_Flags, - writer: io.Writer, + flags: Encoder_Flags, + writer: io.Writer, + temp_allocator: runtime.Allocator, } Decoder_Flag :: enum { @@ -56,9 +54,6 @@ Decoder_Flag :: enum { // Makes the decoder shrink of excess capacity from allocated buffers/containers before returning. Shrink_Excess, - - // Internal flag to do initialization. - _In_Progress, } Decoder_Flags :: bit_set[Decoder_Flag] @@ -122,7 +117,9 @@ decode_from_decoder :: proc(d: Decoder, allocator := context.allocator) -> (v: V d := d - _DECODE_PROGRESS_GUARD(&d) + if d.max_pre_alloc <= 0 { + d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC + } v, err = _decode_from_decoder(d) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. @@ -191,7 +188,7 @@ have to be precomputed, sorted and only then written to the output. Empty flags will do nothing extra to the value. -The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +The allocations for the `.Deterministic_Map_Sorting` flag are done using the given temp_allocator. but are followed by the necessary `delete` and `free` calls if the allocator supports them. This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. @@ -206,22 +203,22 @@ encode :: encode_into // Encodes the CBOR value into binary CBOR allocated on the given allocator. // See the docs on the proc group `encode_into` for more info. -encode_into_bytes :: proc(v: Value, flags := ENCODE_SMALL, allocator := context.allocator) -> (data: []byte, err: Encode_Error) { +encode_into_bytes :: proc(v: Value, flags := ENCODE_SMALL, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (data: []byte, err: Encode_Error) { b := strings.builder_make(allocator) or_return - encode_into_builder(&b, v, flags) or_return + encode_into_builder(&b, v, flags, temp_allocator) or_return return b.buf[:], nil } // Encodes the CBOR value into binary CBOR written to the given builder. // See the docs on the proc group `encode_into` for more info. -encode_into_builder :: proc(b: ^strings.Builder, v: Value, flags := ENCODE_SMALL) -> Encode_Error { - return encode_into_writer(strings.to_stream(b), v, flags) +encode_into_builder :: proc(b: ^strings.Builder, v: Value, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Encode_Error { + return encode_into_writer(strings.to_stream(b), v, flags, temp_allocator) } // Encodes the CBOR value into binary CBOR written to the given writer. // See the docs on the proc group `encode_into` for more info. -encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Encode_Error { - return encode_into_encoder(Encoder{flags, w}, v) +encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Encode_Error { + return encode_into_encoder(Encoder{flags, w, temp_allocator}, v) } // Encodes the CBOR value into binary CBOR written to the given encoder. @@ -229,8 +226,15 @@ encode_into_writer :: proc(w: io.Writer, v: Value, flags := ENCODE_SMALL) -> Enc encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { e := e - _ENCODE_PROGRESS_GUARD(&e) or_return - + if e.temp_allocator.procedure == nil { + e.temp_allocator = context.temp_allocator + } + + if .Self_Described_CBOR in e.flags { + _encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return + e.flags &~= { .Self_Described_CBOR } + } + switch v_spec in v { case u8: return _encode_u8(e.writer, v_spec, .Unsigned) case u16: return _encode_u16(e, v_spec, .Unsigned) @@ -256,66 +260,6 @@ encode_into_encoder :: proc(e: Encoder, v: Value) -> Encode_Error { } } -@(deferred_in_out=_decode_progress_end) -_DECODE_PROGRESS_GUARD :: proc(d: ^Decoder) -> (is_begin: bool, tmp: runtime.Arena_Temp) { - if ._In_Progress in d.flags { - return - } - is_begin = true - - d.flags |= { ._In_Progress } - - if context.allocator != context.temp_allocator { - tmp = runtime.default_temp_allocator_temp_begin() - } - - if d.max_pre_alloc <= 0 { - d.max_pre_alloc = DEFAULT_MAX_PRE_ALLOC - } - - return -} - -_decode_progress_end :: proc(d: ^Decoder, is_begin: bool, tmp: runtime.Arena_Temp) { - if !is_begin { - return - } - - d.flags &~= { ._In_Progress } - - runtime.default_temp_allocator_temp_end(tmp) -} - -@(deferred_in_out=_encode_progress_end) -_ENCODE_PROGRESS_GUARD :: proc(e: ^Encoder) -> (is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { - if ._In_Progress in e.flags { - return - } - is_begin = true - - e.flags |= { ._In_Progress } - - if context.allocator != context.temp_allocator { - tmp = runtime.default_temp_allocator_temp_begin() - } - - if .Self_Described_CBOR in e.flags { - _encode_u64(e^, TAG_SELF_DESCRIBED_CBOR, .Tag) or_return - } - - return -} - -_encode_progress_end :: proc(e: ^Encoder, is_begin: bool, tmp: runtime.Arena_Temp, err: Encode_Error) { - if !is_begin || err != nil { - return - } - - e.flags &~= { ._In_Progress } - - runtime.default_temp_allocator_temp_end(tmp) -} - _decode_header :: proc(r: io.Reader) -> (hdr: Header, err: io.Error) { hdr = Header(_decode_u8(r) or_return) return @@ -602,13 +546,13 @@ _encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { entry: Map_Entry, } - entries := make([]Map_Entry_With_Key, len(m), context.temp_allocator) or_return - defer delete(entries, context.temp_allocator) + entries := make([]Map_Entry_With_Key, len(m), e.temp_allocator) or_return + defer delete(entries, e.temp_allocator) for &entry, i in entries { entry.entry = m[i] - buf := strings.builder_make(context.temp_allocator) or_return + buf := strings.builder_make(e.temp_allocator) or_return ke := e ke.writer = strings.to_stream(&buf) @@ -624,7 +568,7 @@ _encode_map :: proc(e: Encoder, m: Map) -> (err: Encode_Error) { for entry in entries { io.write_full(e.writer, entry.encoded_key) or_return - delete(entry.encoded_key, context.temp_allocator) + delete(entry.encoded_key, e.temp_allocator) encode(e, entry.entry.value) or_return } diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin index efcad5c9e..ee8ba23a0 100644 --- a/core/encoding/cbor/doc.odin +++ b/core/encoding/cbor/doc.odin @@ -4,23 +4,21 @@ Also provided are conversion to and from JSON and the CBOR diagnostic format. **Allocations:** -In general, when in the following table it says allocations are done on the `context.temp_allocator`, these allocations +In general, when in the following table it says allocations are done on the `temp_allocator`, these allocations are still attempted to be deallocated. -This allows you to use an allocator with freeing implemented as the `context.temp_allocator` which is handy with big CBOR. +This allows you to use an allocator with freeing implemented as the `temp_allocator` which is handy with big CBOR. -If you use the default `context.temp_allocator` it will be returned back to its state when the process (en/decoding, (un)marshal) started. - -- *Encoding*: If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on `context.temp_allocator` +- *Encoding*: If the `.Deterministic_Map_Sorting` flag is set on the encoder, this allocates on the given `temp_allocator` some space for the keys of maps in order to sort them and then write them. Other than that there are no allocations (only for the final bytes if you use `cbor.encode_into_bytes`. - *Decoding*: Allocates everything on the given allocator and input given can be deleted after decoding. - *No* allocations are done on the `context.temp_allocator`. + *No* temporary allocations are done. - *Marshal*: Same allocation strategy as encoding. - *Unmarshal*: Allocates everything on the given allocator and input given can be deleted after unmarshalling. - Some temporary allocations are done on the `context.temp_allocator`. + Some temporary allocations are done on the given `temp_allocator`. **Determinism:** diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 7d93088cb..2ffb6b5b4 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -29,7 +29,7 @@ have to be precomputed, sorted and only then written to the output. Empty flags will do nothing extra to the value. -The allocations for the `.Deterministic_Map_Sorting` flag are done using the `context.temp_allocator` +The allocations for the `.Deterministic_Map_Sorting` flag are done using the given `temp_allocator`. but are followed by the necessary `delete` and `free` calls if the allocator supports them. This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. @@ -45,7 +45,7 @@ marshal :: marshal_into // Marshals the given value into a CBOR byte stream (allocated using the given allocator). // See docs on the `marshal_into` proc group for more info. -marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.allocator) -> (bytes: []byte, err: Marshal_Error) { +marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (bytes: []byte, err: Marshal_Error) { b, alloc_err := strings.builder_make(allocator) // The builder as a stream also returns .EOF if it ran out of memory so this is consistent. if alloc_err != nil { @@ -54,7 +54,7 @@ marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.a defer if err != nil { strings.builder_destroy(&b) } - if err = marshal_into_builder(&b, v, flags); err != nil { + if err = marshal_into_builder(&b, v, flags, temp_allocator); err != nil { return } @@ -63,14 +63,14 @@ marshal_into_bytes :: proc(v: any, flags := ENCODE_SMALL, allocator := context.a // Marshals the given value into a CBOR byte stream written to the given builder. // See docs on the `marshal_into` proc group for more info. -marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL) -> Marshal_Error { - return marshal_into_writer(strings.to_writer(b), v, flags) +marshal_into_builder :: proc(b: ^strings.Builder, v: any, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Marshal_Error { + return marshal_into_writer(strings.to_writer(b), v, flags, temp_allocator) } // Marshals the given value into a CBOR byte stream written to the given writer. // See docs on the `marshal_into` proc group for more info. -marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Marshal_Error { - encoder := Encoder{flags, w} +marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL, temp_allocator := context.temp_allocator) -> Marshal_Error { + encoder := Encoder{flags, w, temp_allocator} return marshal_into_encoder(encoder, v) } @@ -79,7 +79,14 @@ marshal_into_writer :: proc(w: io.Writer, v: any, flags := ENCODE_SMALL) -> Mars marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { e := e - err_conv(_ENCODE_PROGRESS_GUARD(&e)) or_return + if e.temp_allocator.procedure == nil { + e.temp_allocator = context.temp_allocator + } + + if .Self_Described_CBOR in e.flags { + err_conv(_encode_u64(e, TAG_SELF_DESCRIBED_CBOR, .Tag)) or_return + e.flags &~= { .Self_Described_CBOR } + } if v == nil { return _encode_nil(e.writer) @@ -321,7 +328,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { switch info.key.id { case string: - entries := make([dynamic]Encoded_Entry_Fast(^[]byte), 0, map_cap, context.temp_allocator) or_return + entries := make([dynamic]Encoded_Entry_Fast(^[]byte), 0, map_cap, e.temp_allocator) or_return defer delete(entries) for bucket_index in 0.. (err: Marshal_Error) { return case cstring: - entries := make([dynamic]Encoded_Entry_Fast(^cstring), 0, map_cap, context.temp_allocator) or_return + entries := make([dynamic]Encoded_Entry_Fast(^cstring), 0, map_cap, e.temp_allocator) or_return defer delete(entries) for bucket_index in 0.. (err: Marshal_Error) { return case: - entries := make([dynamic]Encoded_Entry, 0, map_cap, context.temp_allocator) or_return + entries := make([dynamic]Encoded_Entry, 0, map_cap, e.temp_allocator) or_return defer delete(entries) for bucket_index in 0.. (err: Marshal_Error) { name: string, field: int, } - entries := make([dynamic]Name, 0, n, context.temp_allocator) or_return + entries := make([dynamic]Name, 0, n, e.temp_allocator) or_return defer delete(entries) for name, i in info.names { @@ -530,7 +537,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { case reflect.Type_Info_Named: err_conv(_encode_text(e, vt.name)) or_return case: - builder := strings.builder_make(context.temp_allocator) or_return + builder := strings.builder_make(e.temp_allocator) or_return defer strings.builder_destroy(&builder) reflect.write_type(&builder, vti) err_conv(_encode_text(e, strings.to_string(builder))) or_return diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index eef5d3d99..6e7f3c0bb 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -13,7 +13,7 @@ import "core:unicode/utf8" Unmarshals the given CBOR into the given pointer using reflection. Types that require allocation are allocated using the given allocator. -Some temporary allocations are done on the `context.temp_allocator`, but, if you want to, +Some temporary allocations are done on the given `temp_allocator`, but, if you want to, this can be set to a "normal" allocator, because the necessary `delete` and `free` calls are still made. This is helpful when the CBOR size is so big that you don't want to collect all the temporary allocations until the end. @@ -31,8 +31,8 @@ unmarshal :: proc { unmarshal_from_string, } -unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator) -> (err: Unmarshal_Error) { - err = unmarshal_from_decoder(Decoder{ DEFAULT_MAX_PRE_ALLOC, flags, r }, ptr, allocator=allocator) +unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (err: Unmarshal_Error) { + err = unmarshal_from_decoder(Decoder{ DEFAULT_MAX_PRE_ALLOC, flags, r }, ptr, allocator, temp_allocator) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. if err == .EOF { err = .Unexpected_EOF } @@ -40,23 +40,21 @@ unmarshal_from_reader :: proc(r: io.Reader, ptr: ^$T, flags := Decoder_Flags{}, } // Unmarshals from a string, see docs on the proc group `Unmarshal` for more info. -unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator) -> (err: Unmarshal_Error) { +unmarshal_from_string :: proc(s: string, ptr: ^$T, flags := Decoder_Flags{}, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (err: Unmarshal_Error) { sr: strings.Reader r := strings.to_reader(&sr, s) - err = unmarshal_from_reader(r, ptr, flags, allocator) + err = unmarshal_from_reader(r, ptr, flags, allocator, temp_allocator) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. if err == .EOF { err = .Unexpected_EOF } return } -unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator) -> (err: Unmarshal_Error) { +unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.allocator, temp_allocator := context.temp_allocator) -> (err: Unmarshal_Error) { d := d - _DECODE_PROGRESS_GUARD(&d) - - err = _unmarshal_any_ptr(d, ptr, allocator=allocator) + err = _unmarshal_any_ptr(d, ptr, nil, allocator, temp_allocator) // Normal EOF does not exist here, we try to read the exact amount that is said to be provided. if err == .EOF { err = .Unexpected_EOF } @@ -64,8 +62,9 @@ unmarshal_from_decoder :: proc(d: Decoder, ptr: ^$T, allocator := context.alloca } -_unmarshal_any_ptr :: proc(d: Decoder, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator) -> Unmarshal_Error { +_unmarshal_any_ptr :: proc(d: Decoder, v: any, hdr: Maybe(Header) = nil, allocator := context.allocator, temp_allocator := context.temp_allocator) -> Unmarshal_Error { context.allocator = allocator + context.temp_allocator = temp_allocator v := v if v == nil || v.id == nil { diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 8262e5da4..60c122a69 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -855,7 +855,7 @@ expect_float :: proc(t: ^testing.T, encoded: string, expected: $T, loc := #calle buf: bytes.Buffer stream := bytes.buffer_to_stream(&buf) -encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream} +encoder := cbor.Encoder{cbor.ENCODE_FULLY_DETERMINISTIC, stream, {}} expect_encoding :: proc(t: ^testing.T, val: cbor.Value, encoded: string, loc := #caller_location) { bytes.buffer_reset(&buf) From 2a39c60fe4988339a910828ba6dcb022e3086d7a Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 20:37:19 +0100 Subject: [PATCH 21/66] encoding/cbor: respect default to panic allocator --- core/encoding/cbor/tags.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index c9ddaed56..040ce2458 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -96,7 +96,7 @@ tag_register_number :: proc(impl: Tag_Implementation, nr: Tag_Number, id: string // Controls initialization of default tag implementations. // JS and WASI default to a panic allocator so we don't want to do it on those. -INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_NIL_ALLOCATOR && ODIN_OS != .JS && ODIN_OS != .WASI) +INITIALIZE_DEFAULT_TAGS :: #config(CBOR_INITIALIZE_DEFAULT_TAGS, !ODIN_DEFAULT_TO_PANIC_ALLOCATOR && !ODIN_DEFAULT_TO_NIL_ALLOCATOR) @(private, init, disabled=!INITIALIZE_DEFAULT_TAGS) tags_initialize_defaults :: proc() { From 9fc8587e2c2bff33a063531ef9045d48dc4a587e Mon Sep 17 00:00:00 2001 From: Laytan Date: Tue, 6 Feb 2024 20:41:53 +0100 Subject: [PATCH 22/66] encoding/cbor: untouch net/common.odin --- core/net/common.odin | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/net/common.odin b/core/net/common.odin index 3cd1459a6..2a6f44602 100644 --- a/core/net/common.odin +++ b/core/net/common.odin @@ -413,5 +413,4 @@ DNS_Record_Header :: struct #packed { DNS_Host_Entry :: struct { name: string, addr: Address, -} - +} \ No newline at end of file From 04bd3cc525e5ef366043ace552bd0f3aa7cdd4b8 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Mon, 12 Feb 2024 20:17:24 +0100 Subject: [PATCH 23/66] encoding/cbor: rename `diagnose` to `to_diagnostic_format` to be clearer --- core/encoding/cbor/cbor.odin | 24 ++++++++++---------- core/encoding/cbor/doc.odin | 4 ++-- tests/core/encoding/cbor/test_core_cbor.odin | 14 ++++++------ 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index f879a11aa..defae4163 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -307,23 +307,23 @@ destroy :: proc(val: Value, allocator := context.allocator) { } /* -diagnose either writes or returns a human-readable representation of the value, -optionally formatted, defined as the diagnostic format in section 8 of RFC 8949. +to_diagnostic_format either writes or returns a human-readable representation of the value, +optionally formatted, defined as the diagnostic format in [[RFC 8949 Section 8;https://www.rfc-editor.org/rfc/rfc8949.html#name-diagnostic-notation]]. Incidentally, if the CBOR does not contain any of the additional types defined on top of JSON this will also be valid JSON. */ -diagnose :: proc { - diagnostic_string, - diagnose_to_writer, +to_diagnostic_format :: proc { + to_diagnostic_format_string, + to_diagnostic_format_writer, } // Turns the given CBOR value into a human-readable string. // See docs on the proc group `diagnose` for more info. -diagnostic_string :: proc(val: Value, padding := 0, allocator := context.allocator) -> (string, mem.Allocator_Error) #optional_allocator_error { +to_diagnostic_format_string :: proc(val: Value, padding := 0, allocator := context.allocator) -> (string, mem.Allocator_Error) #optional_allocator_error { b := strings.builder_make(allocator) w := strings.to_stream(&b) - err := diagnose_to_writer(w, val, padding) + err := to_diagnostic_format_writer(w, val, padding) if err == .EOF { // The string builder stream only returns .EOF, and only if it can't write (out of memory). return "", .Out_Of_Memory @@ -335,7 +335,7 @@ diagnostic_string :: proc(val: Value, padding := 0, allocator := context.allocat // Writes the given CBOR value into the writer as human-readable text. // See docs on the proc group `diagnose` for more info. -diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { +to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { @(require_results) indent :: proc(padding: int) -> int { padding := padding @@ -421,7 +421,7 @@ diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { newline(w, padding) or_return for entry, i in v { - diagnose(w, entry, padding) or_return + to_diagnostic_format(w, entry, padding) or_return if i != len(v)-1 { comma(w, padding) or_return newline(w, padding) or_return @@ -444,9 +444,9 @@ diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { newline(w, padding) or_return for entry, i in v { - diagnose(w, entry.key, padding) or_return + to_diagnostic_format(w, entry.key, padding) or_return io.write_string(w, ": ") or_return - diagnose(w, entry.value, padding) or_return + to_diagnostic_format(w, entry.value, padding) or_return if i != len(v)-1 { comma(w, padding) or_return newline(w, padding) or_return @@ -460,7 +460,7 @@ diagnose_to_writer :: proc(w: io.Writer, val: Value, padding := 0) -> io.Error { case ^Tag: io.write_u64(w, v.number) or_return io.write_string(w, "(") or_return - diagnose(w, v.value, padding) or_return + to_diagnostic_format(w, v.value, padding) or_return io.write_string(w, ")") or_return case Simple: io.write_string(w, "simple(") or_return diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin index ee8ba23a0..77eac51cb 100644 --- a/core/encoding/cbor/doc.odin +++ b/core/encoding/cbor/doc.odin @@ -117,8 +117,8 @@ Example: assert(derr == nil) defer cbor.destroy(decoded) - // Turn the CBOR into a human readable representation. - diagnosis, eerr := cbor.diagnose(decoded) + // Turn the CBOR into a human readable representation defined as the diagnostic format in [[RFC 8949 Section 8;https://www.rfc-editor.org/rfc/rfc8949.html#name-diagnostic-notation]]. + diagnosis, eerr := cbor.to_diagnostic_format(decoded) assert(eerr == nil) defer delete(diagnosis) diff --git a/tests/core/encoding/cbor/test_core_cbor.odin b/tests/core/encoding/cbor/test_core_cbor.odin index 60c122a69..72244e1d3 100644 --- a/tests/core/encoding/cbor/test_core_cbor.odin +++ b/tests/core/encoding/cbor/test_core_cbor.odin @@ -212,7 +212,7 @@ test_marshalling :: proc(t: ^testing.T) { ev(t, derr, nil) defer cbor.destroy(decoded) - diagnosis, eerr := cbor.diagnose(decoded) + diagnosis, eerr := cbor.to_diagnostic_format(decoded) ev(t, eerr, nil) defer delete(diagnosis) @@ -379,7 +379,7 @@ test_marshalling_maybe :: proc(t: ^testing.T) { val, derr := cbor.decode(string(data)) expect_value(t, derr, nil) - expect_value(t, cbor.diagnose(val), "1") + expect_value(t, cbor.to_diagnostic_format(val), "1") maybe_dest: Maybe(int) uerr := cbor.unmarshal(string(data), &maybe_dest) @@ -396,7 +396,7 @@ test_marshalling_nil_maybe :: proc(t: ^testing.T) { val, derr := cbor.decode(string(data)) expect_value(t, derr, nil) - expect_value(t, cbor.diagnose(val), "nil") + expect_value(t, cbor.to_diagnostic_format(val), "nil") maybe_dest: Maybe(int) uerr := cbor.unmarshal(string(data), &maybe_dest) @@ -432,7 +432,7 @@ test_marshalling_union :: proc(t: ^testing.T) { val, derr := cbor.decode(string(data)) expect_value(t, derr, nil) - expect_value(t, cbor.diagnose(val, -1), `1010(["My_Distinct", "Hello, World!"])`) + expect_value(t, cbor.to_diagnostic_format(val, -1), `1010(["My_Distinct", "Hello, World!"])`) dest: My_Union uerr := cbor.unmarshal(string(data), &dest) @@ -455,7 +455,7 @@ test_marshalling_union :: proc(t: ^testing.T) { val, derr := cbor.decode(string(data)) expect_value(t, derr, nil) - expect_value(t, cbor.diagnose(val, -1), `1010(["My_Struct", {"my_enum": 1}])`) + expect_value(t, cbor.to_diagnostic_format(val, -1), `1010(["My_Struct", {"my_enum": 1}])`) dest: My_Union_No_Nil uerr := cbor.unmarshal(string(data), &dest) @@ -810,7 +810,7 @@ expect_decoding :: proc(t: ^testing.T, encoded: string, decoded: string, type: t expect_value(t, reflect.union_variant_typeid(res), type, loc) expect_value(t, err, nil, loc) - str := cbor.diagnose(res, padding=-1) + str := cbor.to_diagnostic_format(res, padding=-1) defer delete(str) expect_value(t, str, decoded, loc) @@ -825,7 +825,7 @@ expect_tag :: proc(t: ^testing.T, encoded: string, nr: cbor.Tag_Number, value_de if tag, is_tag := res.(^cbor.Tag); is_tag { expect_value(t, tag.number, nr, loc) - str := cbor.diagnose(tag, padding=-1) + str := cbor.to_diagnostic_format(tag, padding=-1) defer delete(str) expect_value(t, str, value_decoded, loc) From 9a5f3fed8c89bccededf80308c7c6213ae760792 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Mon, 4 Mar 2024 17:26:49 +0100 Subject: [PATCH 24/66] encoding/cbor: fix conflict --- tests/core/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/core/Makefile b/tests/core/Makefile index 3fa38cd34..6c5df7f66 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -51,7 +51,6 @@ noise_test: $(ODIN) run math/noise $(COMMON) -out:test_noise encoding_test: -<<<<<<< HEAD $(ODIN) run encoding/hxa $(COMMON) $(COLLECTION) -out:test_hxa $(ODIN) run encoding/json $(COMMON) -out:test_json $(ODIN) run encoding/varint $(COMMON) -out:test_varint From d5bb67e9e65caa14c962e796e57cb81478ef35f6 Mon Sep 17 00:00:00 2001 From: tim4242 Date: Fri, 12 Apr 2024 20:47:02 +0200 Subject: [PATCH 25/66] Fixup vendor/d3d12 calling convention mistakes --- vendor/directx/d3d12/d3d12.odin | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/vendor/directx/d3d12/d3d12.odin b/vendor/directx/d3d12/d3d12.odin index c39c2c9c9..7c4065d8b 100644 --- a/vendor/directx/d3d12/d3d12.odin +++ b/vendor/directx/d3d12/d3d12.odin @@ -2283,7 +2283,7 @@ IHeap :: struct #raw_union { } IHeap_VTable :: struct { using id3d12devicechild_vtable: IDeviceChild_VTable, - GetDesc: proc "system" (this: ^IHeap) -> HEAP_DESC, + GetDesc: proc "system" (this: ^IHeap, pRetValue: ^HEAP_DESC) -> ^HEAP_DESC, } @@ -2297,7 +2297,7 @@ IResource_VTable :: struct { using id3d12devicechild_vtable: IDeviceChild_VTable, Map: proc "system" (this: ^IResource, Subresource: u32, pReadRange: ^RANGE, ppData: ^rawptr) -> HRESULT, Unmap: proc "system" (this: ^IResource, Subresource: u32, pWrittenRange: ^RANGE), - GetDesc: proc "system" (this: ^IResource) -> RESOURCE_DESC, + GetDesc: proc "system" (this: ^IResource, pRetValue: ^RESOURCE_DESC) -> ^RESOURCE_DESC, GetGPUVirtualAddress: proc "system" (this: ^IResource) -> GPU_VIRTUAL_ADDRESS, WriteToSubresource: proc "system" (this: ^IResource, DstSubresource: u32, pDstBox: ^BOX, pSrcData: rawptr, SrcRowPitch: u32, SrcDepthPitch: u32) -> HRESULT, ReadFromSubresource: proc "system" (this: ^IResource, pDstData: rawptr, DstRowPitch: u32, DstDepthPitch: u32, SrcSubresource: u32, pSrcBox: ^BOX) -> HRESULT, @@ -2514,7 +2514,7 @@ ICommandQueue_VTable :: struct { Wait: proc "system" (this: ^ICommandQueue, pFence: ^IFence, Value: u64) -> HRESULT, GetTimestampFrequency: proc "system" (this: ^ICommandQueue, pFrequency: ^u64) -> HRESULT, GetClockCalibration: proc "system" (this: ^ICommandQueue, pGpuTimestamp: ^u64, pCpuTimestamp: ^u64) -> HRESULT, - GetDesc: proc "system" (this: ^ICommandQueue) -> COMMAND_QUEUE_DESC, + GetDesc: proc "system" (this: ^ICommandQueue, pRetVal: ^COMMAND_QUEUE_DESC) -> ^COMMAND_QUEUE_DESC, } @@ -2712,7 +2712,7 @@ IProtectedResourceSession :: struct #raw_union { } IProtectedResourceSession_VTable :: struct { using id3d12protectedsession_vtable: IProtectedSession_VTable, - GetDesc: proc "system" (this: ^IProtectedResourceSession) -> PROTECTED_RESOURCE_SESSION_DESC, + GetDesc: proc "system" (this: ^IProtectedResourceSession, pRetVal: ^PROTECTED_RESOURCE_SESSION_DESC) -> ^PROTECTED_RESOURCE_SESSION_DESC, } @@ -3492,7 +3492,7 @@ IProtectedResourceSession1 :: struct #raw_union { } IProtectedResourceSession1_VTable :: struct { using id3d12protectedresourcesession_vtable: IProtectedResourceSession_VTable, - GetDesc1: proc "system" (this: ^IProtectedResourceSession1) -> PROTECTED_RESOURCE_SESSION_DESC1, + GetDesc1: proc "system" (this: ^IProtectedResourceSession1, pRetVal: ^PROTECTED_RESOURCE_SESSION_DESC1) -> ^PROTECTED_RESOURCE_SESSION_DESC1, } @@ -3545,7 +3545,7 @@ IResource2 :: struct #raw_union { } IResource2_VTable :: struct { using id3d12resource1_vtable: IResource1_VTable, - GetDesc1: proc "system" (this: ^IResource2) -> RESOURCE_DESC1, + GetDesc1: proc "system" (this: ^IResource2, pRetVal: ^RESOURCE_DESC1) -> ^RESOURCE_DESC1, } From a0cff82320840e4be6d40cee8a2432645773a53e Mon Sep 17 00:00:00 2001 From: "Maurizio M. Gavioli" Date: Sun, 14 Apr 2024 17:18:08 +0200 Subject: [PATCH 26/66] Fix the format of some `doc.odin` files of the `core` library which did not made into the documentation. `c/frontend/tokenizer`: add proper "Example:" header to demo example code, removed empty lines. `container/bit_array`: moved comment before package; aligned narrative lines to left margin; converted case lines into bulleted lines ("- "); converted individual examples to single-tab-indented preformatted text. `dynlib`: removed "//+build ignore" line; added newline at EOF. `image/netpmb`: converted indented lines of "Reading", "Wrting" and "Some syntax..." into bulleted lists; "Formats" indented lines kept as they are as the preformatted text seems relevant to keep the alignments; doubly indented lines kept as single-indented to keep them different (as the format does not allow for two-level bulleted lists); removed empy lines. `os/os2`: WIP, not modified `sys/info`: removed "//+build ignore" line; converted tab-indented initial description into regular left-margin comment; moved uncommented sample code within the doc comment as an "Example:"; moved simple- and double-tabbed separate comments with sample Windows and macOS outputs within the doc comment as bulleted headlines with preformatted output listings; removed now empty comments and blank lines after the package line. `text/i18n`: removed "//+build ignore" line; moved the pacakge line at the end; de-indented the tab-indented introductory narrative; moved sample code comments into the doc comment as tab-indented code with a proper "Example:" heading; removed "```" MD attempts at code formatting. `text/table`: unindented the comment lines of a descriptive kind; headlines of major subdivisions are marked as bold; kept code samples as tab-indented preformatted text (as there are several of them, the standard "Example:" and "Output:" headings cannot be used) removing the "```" MD attempts at code formatting; removed in-between blank lines. --- core/c/frontend/tokenizer/doc.odin | 45 ++++---- core/container/bit_array/doc.odin | 81 +++++++------- core/dynlib/doc.odin | 5 +- core/image/netpbm/doc.odin | 39 ++++--- core/sys/info/doc.odin | 128 ++++++++++----------- core/text/i18n/doc.odin | 171 ++++++++++++++--------------- core/text/table/doc.odin | 31 ++---- 7 files changed, 240 insertions(+), 260 deletions(-) diff --git a/core/c/frontend/tokenizer/doc.odin b/core/c/frontend/tokenizer/doc.odin index 9b1734fc4..43747dfe8 100644 --- a/core/c/frontend/tokenizer/doc.odin +++ b/core/c/frontend/tokenizer/doc.odin @@ -1,34 +1,31 @@ /* -package demo +Example: + package demo -import tokenizer "core:c/frontend/tokenizer" -import preprocessor "core:c/frontend/preprocessor" -import "core:fmt" + import tokenizer "core:c/frontend/tokenizer" + import preprocessor "core:c/frontend/preprocessor" + import "core:fmt" -main :: proc() { - t := &tokenizer.Tokenizer{}; - tokenizer.init_defaults(t); + main :: proc() { + t := &tokenizer.Tokenizer{}; + tokenizer.init_defaults(t); - cpp := &preprocessor.Preprocessor{}; - cpp.warn, cpp.err = t.warn, t.err; - preprocessor.init_lookup_tables(cpp); - preprocessor.init_default_macros(cpp); - cpp.include_paths = {"my/path/to/include"}; + cpp := &preprocessor.Preprocessor{}; + cpp.warn, cpp.err = t.warn, t.err; + preprocessor.init_lookup_tables(cpp); + preprocessor.init_default_macros(cpp); + cpp.include_paths = {"my/path/to/include"}; - tok := tokenizer.tokenize_file(t, "the/source/file.c", 1); + tok := tokenizer.tokenize_file(t, "the/source/file.c", 1); - tok = preprocessor.preprocess(cpp, tok); - if tok != nil { - for t := tok; t.kind != .EOF; t = t.next { - fmt.println(t.lit); + tok = preprocessor.preprocess(cpp, tok); + if tok != nil { + for t := tok; t.kind != .EOF; t = t.next { + fmt.println(t.lit); + } } + + fmt.println("[Done]"); } - - fmt.println("[Done]"); -} */ - - package c_frontend_tokenizer - - diff --git a/core/container/bit_array/doc.odin b/core/container/bit_array/doc.odin index 52e252d8a..371f63f0e 100644 --- a/core/container/bit_array/doc.odin +++ b/core/container/bit_array/doc.odin @@ -1,53 +1,52 @@ -package dynamic_bit_array - /* - The Bit Array can be used in several ways: +The Bit Array can be used in several ways: - -- By default you don't need to instantiate a Bit Array: +- By default you don't need to instantiate a Bit Array: - package test + package test - import "core:fmt" - import "core:container/bit_array" + import "core:fmt" + import "core:container/bit_array" - main :: proc() { - using bit_array + main :: proc() { + using bit_array - bits: Bit_Array + bits: Bit_Array - // returns `true` - fmt.println(set(&bits, 42)) + // returns `true` + fmt.println(set(&bits, 42)) - // returns `false`, `false`, because this Bit Array wasn't created to allow negative indices. - was_set, was_retrieved := get(&bits, -1) - fmt.println(was_set, was_retrieved) - destroy(&bits) + // returns `false`, `false`, because this Bit Array wasn't created to allow negative indices. + was_set, was_retrieved := get(&bits, -1) + fmt.println(was_set, was_retrieved) + destroy(&bits) + } + +- A Bit Array can optionally allow for negative indices, if the minimum value was given during creation: + + package test + + import "core:fmt" + import "core:container/bit_array" + + main :: proc() { + Foo :: enum int { + Negative_Test = -42, + Bar = 420, + Leaves = 69105, } - -- A Bit Array can optionally allow for negative indices, if the mininum value was given during creation: + using bit_array - package test + bits := create(int(max(Foo)), int(min(Foo))) + defer destroy(bits) - import "core:fmt" - import "core:container/bit_array" - - main :: proc() { - Foo :: enum int { - Negative_Test = -42, - Bar = 420, - Leaves = 69105, - } - - using bit_array - - bits := create(int(max(Foo)), int(min(Foo))) - defer destroy(bits) - - fmt.printf("Set(Bar): %v\n", set(bits, Foo.Bar)) - fmt.printf("Get(Bar): %v, %v\n", get(bits, Foo.Bar)) - fmt.printf("Set(Negative_Test): %v\n", set(bits, Foo.Negative_Test)) - fmt.printf("Get(Leaves): %v, %v\n", get(bits, Foo.Leaves)) - fmt.printf("Get(Negative_Test): %v, %v\n", get(bits, Foo.Negative_Test)) - fmt.printf("Freed.\n") - } -*/ \ No newline at end of file + fmt.printf("Set(Bar): %v\n", set(bits, Foo.Bar)) + fmt.printf("Get(Bar): %v, %v\n", get(bits, Foo.Bar)) + fmt.printf("Set(Negative_Test): %v\n", set(bits, Foo.Negative_Test)) + fmt.printf("Get(Leaves): %v, %v\n", get(bits, Foo.Leaves)) + fmt.printf("Get(Negative_Test): %v, %v\n", get(bits, Foo.Negative_Test)) + fmt.printf("Freed.\n") + } +*/ +package dynamic_bit_array diff --git a/core/dynlib/doc.odin b/core/dynlib/doc.odin index 849e03a71..f5c91c54e 100644 --- a/core/dynlib/doc.odin +++ b/core/dynlib/doc.odin @@ -1,6 +1,5 @@ -//+build ignore /* -Package core:dynlib implements loading of shared libraries/DLLs and their symbols. +Package `core:dynlib` implements loading of shared libraries/DLLs and their symbols. The behaviour of dynamically loaded libraries is specific to the target platform of the program. For in depth detail on the underlying behaviour please refer to your target platform's documentation. @@ -8,4 +7,4 @@ For in depth detail on the underlying behaviour please refer to your target plat See `example` directory for an example library exporting 3 symbols and a host program loading them automatically by defining a symbol table struct. */ -package dynlib \ No newline at end of file +package dynlib diff --git a/core/image/netpbm/doc.odin b/core/image/netpbm/doc.odin index 1b5b46856..7106e023e 100644 --- a/core/image/netpbm/doc.odin +++ b/core/image/netpbm/doc.odin @@ -1,5 +1,6 @@ /* Formats: + PBM (P1, P4): Portable Bit Map, stores black and white images (1 channel) PGM (P2, P5): Portable Gray Map, stores greyscale images (1 channel, 1 or 2 bytes per value) PPM (P3, P6): Portable Pixel Map, stores colour images (3 channel, 1 or 2 bytes per value) @@ -7,27 +8,29 @@ Formats: PFM (Pf, PF): Portable Float Map, stores floating-point images (Pf: 1 channel, PF: 3 channel) Reading: - All formats fill out header fields `format`, `width`, `height`, `channels`, `depth` - Specific formats use more fields - PGM, PPM, and PAM set `maxval` (maximum of 65535) - PAM sets `tupltype` if there is one, and can set `channels` to any value (not just 1 or 3) - PFM sets `scale` (float equivalent of `maxval`) and `little_endian` (endianness of stored floats) - Currently doesn't support reading multiple images from one binary-format file + +- All formats fill out header fields `format`, `width`, `height`, `channels`, `depth`. +- Specific formats use more fields: + PGM, PPM, and PAM set `maxval` (maximum of 65535) + PAM sets `tupltype` if there is one, and can set `channels` to any value (not just 1 or 3) + PFM sets `scale` (float equivalent of `maxval`) and `little_endian` (endianness of stored floats) +- Currently doesn't support reading multiple images from one binary-format file. Writing: - You can use your own `Netpbm_Info` struct to control how images are written - All formats require the header field `format` to be specified - Additional header fields are required for specific formats - PGM, PPM, and PAM require `maxval` (maximum of 65535) - PAM also uses `tupltype`, though it may be left as default (empty or nil string) - PFM requires `scale`, and optionally `little_endian` + +- You can use your own `Netpbm_Info` struct to control how images are written. +- All formats require the header field `format` to be specified. +- Additional header fields are required for specific formats: + PGM, PPM, and PAM require `maxval` (maximum of 65535) + PAM also uses `tupltype`, though it may be left as default (empty or nil string) + PFM requires `scale`, and optionally `little_endian` Some syntax differences from the specifications: - `channels` stores the number of values per pixel, what the PAM specification calls `depth` - `depth` instead is the number of bits for a single value (32 for PFM, 16 or 8 otherwise) - `scale` and `little_endian` are separated, so the `header` will always store a positive `scale` - `little_endian` will only be true for a negative `scale` PFM, every other format will be false - `little_endian` only describes the netpbm data being read/written, the image buffer will be native -*/ +- `channels` stores the number of values per pixel, what the PAM specification calls `depth` +- `depth` instead is the number of bits for a single value (32 for PFM, 16 or 8 otherwise) +- `scale` and `little_endian` are separated, so the `header` will always store a positive `scale` +- `little_endian` will only be true for a negative `scale` PFM, every other format will be false +- `little_endian` only describes the netpbm data being read/written, the image buffer will be native +*/ package netpbm diff --git a/core/sys/info/doc.odin b/core/sys/info/doc.odin index 81c3fb342..15af0d4b3 100644 --- a/core/sys/info/doc.odin +++ b/core/sys/info/doc.odin @@ -1,78 +1,78 @@ /* - Copyright 2022 Jeroen van Rijn . - Made available under Odin's BSD-3 license. +Copyright 2022 Jeroen van Rijn . +Made available under Odin's BSD-3 license. - Package `core:sys/info` gathers system information on: - Windows, Linux, macOS, FreeBSD & OpenBSD. +Package `core:sys/info` gathers system information on: +Windows, Linux, macOS, FreeBSD & OpenBSD. - Simply import the package and you'll have access to the OS version, RAM amount - and CPU information. +Simply import the package and you'll have access to the OS version, RAM amount +and CPU information. - On Windows, GPUs will also be enumerated using the registry. +On Windows, GPUs will also be enumerated using the registry. - CPU feature flags can be tested against `cpu_features`, where applicable, e.g. - `if .aes in si.aes { ... }` -*/ -//+build ignore -package sysinfo +CPU feature flags can be tested against `cpu_features`, where applicable, e.g. +`if .aes in si.aes { ... }` -import "core:fmt" -import si "core:sys/info" +Example: -main :: proc() { - fmt.printf("Odin: %v\n", ODIN_VERSION) - fmt.printf("OS: %v\n", si.os_version.as_string) - fmt.printf("OS: %#v\n", si.os_version) - fmt.printf("CPU: %v\n", si.cpu_name) - fmt.printf("RAM: %v MiB\n", si.ram.total_ram / 1024 / 1024) + import "core:fmt" + import si "core:sys/info" - fmt.println() - for gpu, i in si.gpus { - fmt.printf("GPU #%v:\n", i) - fmt.printf("\tVendor: %v\n", gpu.vendor_name) - fmt.printf("\tModel: %v\n", gpu.model_name) - fmt.printf("\tVRAM: %v MiB\n", gpu.total_ram / 1024 / 1024) + main :: proc() { + fmt.printf("Odin: %v\n", ODIN_VERSION) + fmt.printf("OS: %v\n", si.os_version.as_string) + fmt.printf("OS: %#v\n", si.os_version) + fmt.printf("CPU: %v\n", si.cpu_name) + fmt.printf("RAM: %v MiB\n", si.ram.total_ram / 1024 / 1024) + + fmt.println() + for gpu, i in si.gpus { + fmt.printf("GPU #%v:\n", i) + fmt.printf("\tVendor: %v\n", gpu.vendor_name) + fmt.printf("\tModel: %v\n", gpu.model_name) + fmt.printf("\tVRAM: %v MiB\n", gpu.total_ram / 1024 / 1024) + } } -} -/* - Example Windows output: - Odin: dev-2022-09 - OS: Windows 10 Professional (version: 20H2), build: 19042.1466 - OS: OS_Version{ - platform = "Windows", - major = 10, - minor = 0, +- Example Windows output: + + Odin: dev-2022-09 + OS: Windows 10 Professional (version: 20H2), build: 19042.1466 + OS: OS_Version{ + platform = "Windows", + major = 10, + minor = 0, + patch = 0, + build = [ + 19042, + 1466, + ], + version = "20H2", + as_string = "Windows 10 Professional (version: 20H2), build: 19042.1466", + } + CPU: AMD Ryzen 7 1800X Eight-Core Processor + RAM: 65469 MiB + GPU #0: + Vendor: Advanced Micro Devices, Inc. + Model: Radeon RX Vega + VRAM: 8176 MiB + +- Example macOS output: + + ODIN: dev-2022-09 + OS: OS_Version{ + platform = "MacOS", + major = 21, + minor = 5, patch = 0, build = [ - 19042, - 1466, + 0, + 0, ], - version = "20H2", - as_string = "Windows 10 Professional (version: 20H2), build: 19042.1466", - } - CPU: AMD Ryzen 7 1800X Eight-Core Processor - RAM: 65469 MiB - - GPU #0: - Vendor: Advanced Micro Devices, Inc. - Model: Radeon RX Vega - VRAM: 8176 MiB - - Example macOS output: - ODIN: dev-2022-09 - OS: OS_Version{ - platform = "MacOS", - major = 21, - minor = 5, - patch = 0, - build = [ - 0, - 0, - ], - version = "21F79", - as_string = "macOS Monterey 12.4 (build 21F79, kernel 21.5.0)", - } - CPU: Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz - RAM: 8192 MiB + version = "21F79", + as_string = "macOS Monterey 12.4 (build 21F79, kernel 21.5.0)", + } + CPU: Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz + RAM: 8192 MiB */ +package sysinfo diff --git a/core/text/i18n/doc.odin b/core/text/i18n/doc.odin index ef619451e..54bf8b80f 100644 --- a/core/text/i18n/doc.odin +++ b/core/text/i18n/doc.odin @@ -1,111 +1,106 @@ -//+build ignore -package i18n /* - The i18n package is flexible and easy to use. +The `i18n` package is flexible and easy to use. - It has one call to get a translation: `get`, which the user can alias into something like `T`. +It has one call to get a translation: `get`, which the user can alias into something like `T`. - `get`, referred to as `T` here, has a few different signatures. - All of them will return the key if the entry can't be found in the active translation catalog. +`get`, referred to as `T` here, has a few different signatures. +All of them will return the key if the entry can't be found in the active translation catalog. - - `T(key)` returns the translation of `key`. - - `T(key, n)` returns a pluralized translation of `key` according to value `n`. +- `T(key)` returns the translation of `key`. +- `T(key, n)` returns a pluralized translation of `key` according to value `n`. - - `T(section, key)` returns the translation of `key` in `section`. - - `T(section, key, n)` returns a pluralized translation of `key` in `section` according to value `n`. +- `T(section, key)` returns the translation of `key` in `section`. +- `T(section, key, n)` returns a pluralized translation of `key` in `section` according to value `n`. - By default lookup take place in the global `i18n.ACTIVE` catalog for ease of use. - If you want to override which translation to use, for example in a language preview dialog, you can use the following: +By default lookup take place in the global `i18n.ACTIVE` catalog for ease of use. +If you want to override which translation to use, for example in a language preview dialog, you can use the following: - - `T(key, n, catalog)` returns the pluralized version of `key` from explictly supplied catalog. - - `T(section, key, n, catalog)` returns the pluralized version of `key` in `section` from explictly supplied catalog. +- `T(key, n, catalog)` returns the pluralized version of `key` from explictly supplied catalog. +- `T(section, key, n, catalog)` returns the pluralized version of `key` in `section` from explictly supplied catalog. - If a catalog has translation contexts or sections, then ommitting it in the above calls looks up in section "". +If a catalog has translation contexts or sections, then omitting it in the above calls looks up in section "". - The default pluralization rule is n != 1, which is to say that passing n == 1 (or not passing n) returns the singular form. - Passing n != 1 returns plural form 1. +The default pluralization rule is n != 1, which is to say that passing n == 1 (or not passing n) returns the singular form. +Passing n != 1 returns plural form 1. - Should a language not conform to this rule, you can pass a pluralizer procedure to the catalog parser. - This is a procedure that maps an integer to an integer, taking a value and returning which plural slot should be used. +Should a language not conform to this rule, you can pass a pluralizer procedure to the catalog parser. +This is a procedure that maps an integer to an integer, taking a value and returning which plural slot should be used. - You can also assign it to a loaded catalog after parsing, of course. +You can also assign it to a loaded catalog after parsing, of course. - Some code examples follow. -*/ +Example: -/* -```cpp -import "core:fmt" -import "core:text/i18n" + import "core:fmt" + import "core:text/i18n" -T :: i18n.get + T :: i18n.get -mo :: proc() { - using fmt + mo :: proc() { + using fmt - err: i18n.Error + err: i18n.Error - /* - Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter. - */ - i18n.ACTIVE, err = i18n.parse_mo(#load("translations/nl_NL.mo")) - defer i18n.destroy() + /* + Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter. + */ + i18n.ACTIVE, err = i18n.parse_mo(#load("translations/nl_NL.mo")) + defer i18n.destroy() - if err != .None { return } + if err != .None { return } - /* - These are in the .MO catalog. - */ - println("-----") - println(T("")) - println("-----") - println(T("There are 69,105 leaves here.")) - println("-----") - println(T("Hellope, World!")) - println("-----") - // We pass 1 into `T` to get the singular format string, then 1 again into printf. - printf(T("There is %d leaf.\n", 1), 1) - // We pass 42 into `T` to get the plural format string, then 42 again into printf. - printf(T("There is %d leaf.\n", 42), 42) + /* + These are in the .MO catalog. + */ + println("-----") + println(T("")) + println("-----") + println(T("There are 69,105 leaves here.")) + println("-----") + println(T("Hellope, World!")) + println("-----") + // We pass 1 into `T` to get the singular format string, then 1 again into printf. + printf(T("There is %d leaf.\n", 1), 1) + // We pass 42 into `T` to get the plural format string, then 42 again into printf. + printf(T("There is %d leaf.\n", 42), 42) - /* - This isn't in the translation catalog, so the key is passed back untranslated. - */ - println("-----") - println(T("Come visit us on Discord!")) -} - -qt :: proc() { - using fmt - - err: i18n.Error - - /* - Parse QT file and set it as the active translation so we can omit `get`'s "catalog" parameter. - */ - i18n.ACTIVE, err = i18n.parse_qt(#load("translations/nl_NL-qt-ts.ts")) - defer i18n.destroy() - - if err != .None { - return + /* + This isn't in the translation catalog, so the key is passed back untranslated. + */ + println("-----") + println(T("Come visit us on Discord!")) } - /* - These are in the .TS catalog. As you can see they have sections. - */ - println("--- Page section ---") - println("Page:Text for translation =", T("Page", "Text for translation")) - println("-----") - println("Page:Also text to translate =", T("Page", "Also text to translate")) - println("-----") - println("--- installscript section ---") - println("installscript:99 bottles of beer on the wall =", T("installscript", "99 bottles of beer on the wall")) - println("-----") - println("--- apple_count section ---") - println("apple_count:%d apple(s) =") - println("\t 1 =", T("apple_count", "%d apple(s)", 1)) - println("\t 42 =", T("apple_count", "%d apple(s)", 42)) -} -``` -*/ \ No newline at end of file + qt :: proc() { + using fmt + + err: i18n.Error + + /* + Parse QT file and set it as the active translation so we can omit `get`'s "catalog" parameter. + */ + i18n.ACTIVE, err = i18n.parse_qt(#load("translations/nl_NL-qt-ts.ts")) + defer i18n.destroy() + + if err != .None { + return + } + + /* + These are in the .TS catalog. As you can see they have sections. + */ + println("--- Page section ---") + println("Page:Text for translation =", T("Page", "Text for translation")) + println("-----") + println("Page:Also text to translate =", T("Page", "Also text to translate")) + println("-----") + println("--- installscript section ---") + println("installscript:99 bottles of beer on the wall =", T("installscript", "99 bottles of beer on the wall")) + println("-----") + println("--- apple_count section ---") + println("apple_count:%d apple(s) =") + println("\t 1 =", T("apple_count", "%d apple(s)", 1)) + println("\t 42 =", T("apple_count", "%d apple(s)", 42)) + } +*/ +package i18n diff --git a/core/text/table/doc.odin b/core/text/table/doc.odin index 9b5c1f932..76886bdea 100644 --- a/core/text/table/doc.odin +++ b/core/text/table/doc.odin @@ -1,11 +1,8 @@ /* - package table implements ascii/markdown/html/custom rendering of tables. +The package `table` implements ASCII/markdown/HTML/custom rendering of tables. - --- +**Custom rendering example:** - Custom rendering example: - - ```odin tbl := init(&Table{}) padding(tbl, 0, 1) row(tbl, "A_LONG_ENUM", "= 54,", "// A comment about A_LONG_ENUM") @@ -17,19 +14,14 @@ } io.write_byte(stdio_writer(), '\n') } - ``` - This outputs: - ``` +This outputs: + A_LONG_ENUM = 54, // A comment about A_LONG_ENUM AN_EVEN_LONGER_ENUM = 1, // A comment about AN_EVEN_LONGER_ENUM - ``` - --- +**ASCII rendering example:** - ASCII rendering example: - - ```odin tbl := init(&Table{}) defer destroy(tbl) @@ -69,10 +61,9 @@ write_ascii_table(stdio_writer(), tbl) write_markdown_table(stdio_writer(), tbl) - ``` - This outputs: - ``` +This outputs: + +-----------------------------------------------+ | This is a table caption and it is very long | +------------------+-----------------+----------+ @@ -82,19 +73,15 @@ | 000000005 | 6.283185 | | | a | bbb | c | +------------------+-----------------+----------+ - ``` - and +and - ``` | AAAAAAAAA | B | C | |:-----------------|:---------------:|---------:| | 123 | foo | | | 000000005 | 6.283185 | | | a | bbb | c | - ``` - respectively. +respectively. */ - package text_table From a0e25be1967315d1ab2cf6b937549f3ff74c71f6 Mon Sep 17 00:00:00 2001 From: flysand7 Date: Mon, 15 Apr 2024 11:16:52 +1100 Subject: [PATCH 27/66] [sys/linux]: Fix signature on wait4 syscall --- core/sys/linux/sys.odin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/sys/linux/sys.odin b/core/sys/linux/sys.odin index 869ce88e3..54a34fbea 100644 --- a/core/sys/linux/sys.odin +++ b/core/sys/linux/sys.odin @@ -787,8 +787,8 @@ exit :: proc "contextless" (code: i32) -> ! { Wait for the process to change state. Available since Linux 1.0. */ -wait4 :: proc "contextless" (pid: Pid, status: ^u32, options: Wait_Options) -> (Pid, Errno) { - ret := syscall(SYS_wait4, pid, status, transmute(u32) options) +wait4 :: proc "contextless" (pid: Pid, status: ^u32, options: Wait_Options, rusage: ^RUsage) -> (Pid, Errno) { + ret := syscall(SYS_wait4, pid, status, transmute(u32) options, rusage) return errno_unwrap(ret, Pid) } From 2e29687ceeb004bfa820dd8c475786d2fed78a6d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Apr 2024 10:28:14 +0100 Subject: [PATCH 28/66] Fix #3425 --- src/error.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/error.cpp b/src/error.cpp index 8647f60b9..2e6641e3b 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -292,10 +292,11 @@ gb_internal isize show_error_on_line(TokenPos const &pos, TokenPos end, char con if (line_len > MAX_LINE_LENGTH_PADDED) { i32 left = MAX_TAB_WIDTH; - if (offset > 0) { - line_text += offset-left; - line_len -= offset-left; - offset = left+MAX_TAB_WIDTH/2; + i32 diff = gb_max(offset-left, 0); + if (diff > 0) { + line_text += diff; + line_len -= diff; + offset = left + ELLIPSIS_PADDING/2; } if (line_len > MAX_LINE_LENGTH_PADDED) { line_len = MAX_LINE_LENGTH_PADDED; @@ -304,7 +305,7 @@ gb_internal isize show_error_on_line(TokenPos const &pos, TokenPos end, char con squiggle_extra = 1; } } - if (offset > 0) { + if (diff > 0) { error_out("... %.*s ...", cast(i32)line_len, line_text); } else { error_out("%.*s ...", cast(i32)line_len, line_text); From a294f067a96bdc0fb696af785940ac4f8bb18f22 Mon Sep 17 00:00:00 2001 From: Feoramund <161657516+Feoramund@users.noreply.github.com> Date: Mon, 15 Apr 2024 05:28:22 -0400 Subject: [PATCH 29/66] Fix `big.internal_random_prime` with `Second_MSB_On` --- core/math/big/prime.odin | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/core/math/big/prime.odin b/core/math/big/prime.odin index b02b7cb4e..5e7c02f37 100644 --- a/core/math/big/prime.odin +++ b/core/math/big/prime.odin @@ -1247,6 +1247,20 @@ internal_random_prime :: proc(a: ^Int, size_in_bits: int, trials: int, flags := a.digit[0] |= 3 } if .Second_MSB_On in flags { + /* + Ensure there's enough space for the bit to be set. + */ + if a.used * _DIGIT_BITS < size_in_bits - 1 { + new_size := (size_in_bits - 1) / _DIGIT_BITS + + if new_size % _DIGIT_BITS > 0 { + new_size += 1 + } + + internal_grow(a, new_size) or_return + a.used = new_size + } + internal_int_bitfield_set_single(a, size_in_bits - 2) or_return } From 4bfa1ea76c93e6cb6ba234a39f8628f881a09c12 Mon Sep 17 00:00:00 2001 From: flysand7 Date: Mon, 15 Apr 2024 22:16:03 +1100 Subject: [PATCH 30/66] [sys/linux]: Fix syscall calls for open and fstat --- core/sys/linux/sys.odin | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/sys/linux/sys.odin b/core/sys/linux/sys.odin index 869ce88e3..e94d3015b 100644 --- a/core/sys/linux/sys.odin +++ b/core/sys/linux/sys.odin @@ -40,10 +40,10 @@ write :: proc "contextless" (fd: Fd, buf: []u8) -> (int, Errno) { */ open :: proc "contextless" (name: cstring, flags: Open_Flags, mode: Mode = {}) -> (Fd, Errno) { when ODIN_ARCH == .arm64 { - ret := syscall(SYS_openat, AT_FDCWD, transmute(uintptr) name, transmute(u32) mode) + ret := syscall(SYS_openat, AT_FDCWD, transmute(uintptr) name, transmute(u32) flags, transmute(u32) mode) return errno_unwrap(ret, Fd) } else { - ret := syscall(SYS_open, transmute(uintptr) name, transmute(u32) mode) + ret := syscall(SYS_open, transmute(uintptr) name, transmute(u32) flags, transmute(u32) mode) return errno_unwrap(ret, Fd) } } @@ -91,10 +91,10 @@ stat :: proc "contextless" (filename: cstring, stat: ^Stat) -> (Errno) { */ fstat :: proc "contextless" (fd: Fd, stat: ^Stat) -> (Errno) { when size_of(int) == 8 { - ret := syscall(SYS_fstat, stat) + ret := syscall(SYS_fstat, cast(i32) fd, stat) return Errno(-ret) } else { - ret := syscall(SYS_fstat64, stat) + ret := syscall(SYS_fstat64, cast(i32) fd, stat) return Errno(-ret) } } From 7e582dd671addfd2119b0a9016635cdaddc6b22f Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Apr 2024 12:43:27 +0100 Subject: [PATCH 31/66] Add basic suggestion to missing `package` name --- src/parser.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/parser.cpp b/src/parser.cpp index 01a3069ff..f4d3dc48d 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -6114,7 +6114,13 @@ gb_internal bool parse_file(Parser *p, AstFile *f) { CommentGroup *docs = f->lead_comment; if (f->curr_token.kind != Token_package) { + ERROR_BLOCK(); syntax_error(f->curr_token, "Expected a package declaration at the beginning of the file"); + // IMPORTANT NOTE(bill): this is technically a race condition with the suggestion, but it's ony a suggession + // so in practice is should be "fine" + if (f->pkg && f->pkg->name != "") { + error_line("\tSuggestion: Add 'package %.*s' to the top of the file\n", LIT(f->pkg->name)); + } return false; } From 36644a3c09630cc75e7826ec443bb760bdbbd4af Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Apr 2024 12:43:45 +0100 Subject: [PATCH 32/66] Add template specialization for compared against `""` with `String` internally --- src/string.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/string.cpp b/src/string.cpp index 7bfa52f33..4adec7a90 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -171,6 +171,9 @@ template gb_internal bool operator > (String const &a, char const (&b template gb_internal bool operator <= (String const &a, char const (&b)[N]) { return str_le(a, make_string(cast(u8 *)b, N-1)); } template gb_internal bool operator >= (String const &a, char const (&b)[N]) { return str_ge(a, make_string(cast(u8 *)b, N-1)); } +template <> bool operator == (String const &a, char const (&b)[1]) { return a.len == 0; } +template <> bool operator != (String const &a, char const (&b)[1]) { return a.len != 0; } + gb_internal gb_inline bool string_starts_with(String const &s, String const &prefix) { if (prefix.len > s.len) { return false; From 38c1fd58241ca3da4f539958b4cc10574b641138 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Apr 2024 14:35:51 +0100 Subject: [PATCH 33/66] Keep -vet happy --- core/encoding/cbor/cbor.odin | 4 ++-- core/encoding/cbor/coding.odin | 6 ++++-- core/encoding/cbor/marshal.odin | 13 +++++-------- core/encoding/cbor/unmarshal.odin | 12 ++++++------ 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index defae4163..550cf87fd 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -428,7 +428,7 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i } } - padding := dedent(padding) + padding = dedent(padding) newline(w, padding) or_return io.write_string(w, "]") or_return @@ -453,7 +453,7 @@ to_diagnostic_format_writer :: proc(w: io.Writer, val: Value, padding := 0) -> i } } - padding := dedent(padding) + padding = dedent(padding) newline(w, padding) or_return io.write_string(w, "}") or_return diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index abb832ccf..11db994da 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -377,6 +377,7 @@ _decode_bytes_ptr :: proc(d: Decoder, add: Add, type: Major = .Bytes) -> (v: ^By _decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes, allocator := context.allocator) -> (v: Bytes, err: Decode_Error) { context.allocator = allocator + add := add n, scap := _decode_len_str(d, add) or_return buf := strings.builder_make(0, scap) or_return @@ -385,8 +386,9 @@ _decode_bytes :: proc(d: Decoder, add: Add, type: Major = .Bytes, allocator := c if n == -1 { indefinite_loop: for { - header := _decode_header(d.reader) or_return - maj, add := _header_split(header) + header := _decode_header(d.reader) or_return + maj: Major + maj, add = _header_split(header) #partial switch maj { case type: iter_n, iter_cap := _decode_len_str(d, add) or_return diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 2ffb6b5b4..87e91bbd8 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -208,7 +208,6 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { } case runtime.Type_Info_Boolean: - val: bool switch b in a { case bool: return _encode_bool(e.writer, b) case b8: return _encode_bool(e.writer, bool(b)) @@ -231,7 +230,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { return case runtime.Type_Info_Enumerated_Array: - index := runtime.type_info_base(info.index).variant.(runtime.Type_Info_Enum) + // index := runtime.type_info_base(info.index).variant.(runtime.Type_Info_Enum) err_conv(_encode_u64(e, u64(info.count), .Array)) or_return for i in 0.. (err: Marshal_Error) { entries := make([dynamic]Name, 0, n, e.temp_allocator) or_return defer delete(entries) - for name, i in info.names { + for _, i in info.names { fname := field_name(info, i) if fname == "-" { continue @@ -498,7 +497,7 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { marshal_entry(e, info, v, entry.name, entry.field) or_return } } else { - for name, i in info.names { + for _, i in info.names { fname := field_name(info, i) if fname == "-" { continue @@ -514,14 +513,12 @@ marshal_into_encoder :: proc(e: Encoder, v: any) -> (err: Marshal_Error) { case Value: return err_conv(encode(e, vv)) } - tag := reflect.get_union_variant_raw_tag(v) - if v.data == nil || tag <= 0 { + id := reflect.union_variant_typeid(v) + if v.data == nil || id == nil { return _encode_nil(e.writer) } - id := info.variants[tag-1].id if len(info.variants) == 1 { - id := info.variants[tag-1].id return marshal_into(e, any{v.data, id}) } diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 6e7f3c0bb..5480b9125 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -518,7 +518,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return case reflect.Type_Info_Array: - _length, scap := err_conv(_decode_len_container(d, add)) or_return + _, scap := err_conv(_decode_len_container(d, add)) or_return length := min(scap, t.count) if length > t.count { @@ -532,7 +532,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return case reflect.Type_Info_Enumerated_Array: - _length, scap := err_conv(_decode_len_container(d, add)) or_return + _, scap := err_conv(_decode_len_container(d, add)) or_return length := min(scap, t.count) if length > t.count { @@ -546,7 +546,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return case reflect.Type_Info_Complex: - _length, scap := err_conv(_decode_len_container(d, add)) or_return + _, scap := err_conv(_decode_len_container(d, add)) or_return length := min(scap, 2) if length > 2 { @@ -568,7 +568,7 @@ _unmarshal_array :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header return case reflect.Type_Info_Quaternion: - _length, scap := err_conv(_decode_len_container(d, add)) or_return + _, scap := err_conv(_decode_len_container(d, add)) or_return length := min(scap, 4) if length > 4 { @@ -628,7 +628,7 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, return _unsupported(v, hdr) } - length, scap := err_conv(_decode_len_container(d, add)) or_return + length, _ := err_conv(_decode_len_container(d, add)) or_return unknown := length == -1 fields := reflect.struct_fields_zipped(ti.id) @@ -672,7 +672,7 @@ _unmarshal_map :: proc(d: Decoder, v: any, ti: ^reflect.Type_Info, hdr: Header, } field := fields[use_field_idx] - name := field.name + // name := field.name ptr := rawptr(uintptr(v.data) + field.offset) fany := any{ptr, field.type.id} _unmarshal_value(d, fany, _decode_header(r) or_return) or_return From 69db9c6390a7ad5e59b478877b745643b25fec1c Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Apr 2024 15:40:32 +0100 Subject: [PATCH 34/66] Add loads of `nil` checks when doing `s.builder` --- core/text/edit/text_edit.odin | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/core/text/edit/text_edit.odin b/core/text/edit/text_edit.odin index caccb6be8..08fce0444 100644 --- a/core/text/edit/text_edit.odin +++ b/core/text/edit/text_edit.odin @@ -92,7 +92,7 @@ begin :: proc(s: ^State, id: u64, builder: ^strings.Builder) { end(s) } s.id = id - s.selection = {len(builder.buf), 0} + s.selection = {len(builder.buf, 0} s.builder = builder update_time(s) undo_clear(s, &s.undo) @@ -137,6 +137,9 @@ clear_all :: proc(s: ^State) -> (cleared: bool) { // push current text state to the wanted undo|redo stack undo_state_push :: proc(s: ^State, undo: ^[dynamic]^Undo_State) -> mem.Allocator_Error { + if s.builder != nil { + return nil + } text := string(s.builder.buf[:]) item := (^Undo_State)(mem.alloc(size_of(Undo_State) + len(text), align_of(Undo_State), s.undo_text_allocator) or_return) item.selection = s.selection @@ -154,7 +157,7 @@ undo :: proc(s: ^State, undo, redo: ^[dynamic]^Undo_State) { undo_state_push(s, redo) item := pop(undo) s.selection = item.selection - #no_bounds_check { + #no_bounds_check if s.builder != nil { strings.builder_reset(s.builder) strings.write_string(s.builder, string(item.text[:item.len])) } @@ -224,13 +227,17 @@ input_rune :: proc(s: ^State, r: rune) { // insert a single rune into the edit state - deletes the current selection insert :: proc(s: ^State, at: int, text: string) { undo_check(s) - inject_at(&s.builder.buf, at, text) + if s.builder != nil { + inject_at(&s.builder.buf, at, text) + } } // remove the wanted range withing, usually the selection within byte indices remove :: proc(s: ^State, lo, hi: int) { undo_check(s) - remove_range(&s.builder.buf, lo, hi) + if s.builder != nil { + remove_range(&s.builder.buf, lo, hi) + } } // true if selection head and tail dont match and form a selection of multiple characters @@ -244,8 +251,8 @@ has_selection :: proc(s: ^State) -> bool { sorted_selection :: proc(s: ^State) -> (lo, hi: int) { lo = min(s.selection[0], s.selection[1]) hi = max(s.selection[0], s.selection[1]) - lo = clamp(lo, 0, len(s.builder.buf)) - hi = clamp(hi, 0, len(s.builder.buf)) + lo = clamp(lo, 0, len(s.builder.buf) if s.builder != nil else 0) + hi = clamp(hi, 0, len(s.builder.buf) if s.builder != nil else 0) return } @@ -265,7 +272,10 @@ translate_position :: proc(s: ^State, t: Translation) -> int { return b == ' ' || b == '\t' || b == '\n' } - buf := s.builder.buf[:] + buf: []byte + if s.builder != nil { + buf = s.builder.buf[:] + } pos := clamp(s.selection[0], 0, len(buf)) switch t { @@ -352,7 +362,10 @@ delete_to :: proc(s: ^State, t: Translation) { // return the currently selected text current_selected_text :: proc(s: ^State) -> string { lo, hi := sorted_selection(s) - return string(s.builder.buf[lo:hi]) + if s.builder != nil { + return string(s.builder.buf[lo:hi]) + } + return "" } // copy & delete the current selection when copy() succeeds @@ -431,7 +444,7 @@ perform_command :: proc(s: ^State, cmd: Command) { case .Cut: cut(s) case .Copy: copy(s) case .Paste: paste(s) - case .Select_All: s.selection = {len(s.builder.buf), 0} + case .Select_All: s.selection = {len(s.builder.buf) if s.builder != nil else 0, 0} case .Backspace: delete_to(s, .Left) case .Delete: delete_to(s, .Right) case .Delete_Word_Left: delete_to(s, .Word_Left) From 76229cabfaf1f3fd973842b8e11e16c2e4e64896 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 15 Apr 2024 15:44:03 +0100 Subject: [PATCH 35/66] Fix typo --- core/text/edit/text_edit.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/text/edit/text_edit.odin b/core/text/edit/text_edit.odin index 08fce0444..6f21c9860 100644 --- a/core/text/edit/text_edit.odin +++ b/core/text/edit/text_edit.odin @@ -92,7 +92,7 @@ begin :: proc(s: ^State, id: u64, builder: ^strings.Builder) { end(s) } s.id = id - s.selection = {len(builder.buf, 0} + s.selection = {len(builder.buf), 0} s.builder = builder update_time(s) undo_clear(s, &s.undo) From 0729f2b4fb1d03beaae468ab7da8b736b060da3e Mon Sep 17 00:00:00 2001 From: Karl Zylinski Date: Mon, 15 Apr 2024 21:26:30 +0200 Subject: [PATCH 36/66] Fix for dynlib:initialize_symbols not passing -vet-unused --- core/dynlib/lib.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/dynlib/lib.odin b/core/dynlib/lib.odin index 30d55edae..3d41cbe2e 100644 --- a/core/dynlib/lib.odin +++ b/core/dynlib/lib.odin @@ -135,7 +135,7 @@ initialize_symbols :: proc( prefixed_symbol_buf: [2048]u8 = --- count = 0 - for field, i in reflect.struct_fields_zipped(T) { + for field in reflect.struct_fields_zipped(T) { // Calculate address of struct member field_ptr := rawptr(uintptr(symbol_table) + field.offset) From 3e449e93dd60d80c89657d42efa85cb3cc4e8cc3 Mon Sep 17 00:00:00 2001 From: Feoramund <161657516+Feoramund@users.noreply.github.com> Date: Mon, 15 Apr 2024 17:07:05 -0400 Subject: [PATCH 37/66] Implement Fisher-Yates shuffle --- core/math/rand/rand.odin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/math/rand/rand.odin b/core/math/rand/rand.odin index 560dc8379..d6a20bd1e 100644 --- a/core/math/rand/rand.odin +++ b/core/math/rand/rand.odin @@ -789,8 +789,8 @@ shuffle :: proc(array: $T/[]$E, r: ^Rand = nil) { return } - for i := i64(0); i < n; i += 1 { - j := int63_max(n, r) + for i := i64(n - 1); i > 0; i -= 1 { + j := int63_max(i + 1, r) array[i], array[j] = array[j], array[i] } } From 8a0f9ae108a75d9ca86b8a91fca2f2423e0a58df Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 16 Apr 2024 13:15:23 +0100 Subject: [PATCH 38/66] Print to string buffer before printing errors --- src/error.cpp | 142 ++++++++++++++++++++++++++++--------------------- src/string.cpp | 37 +++++++++++++ 2 files changed, 119 insertions(+), 60 deletions(-) diff --git a/src/error.cpp b/src/error.cpp index 2e6641e3b..8c9fb265b 100644 --- a/src/error.cpp +++ b/src/error.cpp @@ -7,7 +7,8 @@ struct ErrorValue { ErrorValueKind kind; TokenPos pos; TokenPos end; - Array msgs; + Array msg; + bool seen_newline; }; struct ErrorCollector { @@ -30,19 +31,21 @@ gb_global ErrorCollector global_error_collector; gb_internal void push_error_value(TokenPos const &pos, ErrorValueKind kind = ErrorValue_Error) { GB_ASSERT_MSG(global_error_collector.curr_error_value_set.load() == false, "Possible race condition in error handling system, please report this with an issue"); ErrorValue ev = {kind, pos}; - ev.msgs.allocator = heap_allocator(); + ev.msg.allocator = heap_allocator(); global_error_collector.curr_error_value = ev; global_error_collector.curr_error_value_set.store(true); } gb_internal void pop_error_value(void) { + mutex_lock(&global_error_collector.mutex); if (global_error_collector.curr_error_value_set.load()) { array_add(&global_error_collector.error_values, global_error_collector.curr_error_value); global_error_collector.curr_error_value = {}; global_error_collector.curr_error_value_set.store(false); } + mutex_unlock(&global_error_collector.mutex); } @@ -180,9 +183,18 @@ gb_internal ERROR_OUT_PROC(default_error_out_va) { isize n = len-1; if (n > 0) { - String msg = copy_string(permanent_allocator(), {(u8 *)buf, n}); ErrorValue *ev = get_error_value(); - array_add(&ev->msgs, msg); + if (terse_errors()) { + for (isize i = 0; i < n && !ev->seen_newline; i++) { + u8 c = cast(u8)buf[i]; + if (c == '\n') { + ev->seen_newline = true; + } + array_add(&ev->msg, c); + } + } else { + array_add_elems(&ev->msg, (u8 *)buf, n); + } } } @@ -645,109 +657,119 @@ gb_internal int error_value_cmp(void const *a, void const *b) { } gb_internal void print_all_errors(void) { - auto const &escape_char = [](gbFile *f, u8 c) { + auto const &escape_char = [](gbString res, u8 c) -> gbString { switch (c) { - case '\n': gb_file_write(f, "\\n", 2); break; - case '"': gb_file_write(f, "\\\"", 2); break; - case '\\': gb_file_write(f, "\\\\", 2); break; - case '\b': gb_file_write(f, "\\b", 2); break; - case '\f': gb_file_write(f, "\\f", 2); break; - case '\r': gb_file_write(f, "\\r", 2); break; - case '\t': gb_file_write(f, "\\t", 2); break; + case '\n': res = gb_string_append_length(res, "\\n", 2); break; + case '"': res = gb_string_append_length(res, "\\\"", 2); break; + case '\\': res = gb_string_append_length(res, "\\\\", 2); break; + case '\b': res = gb_string_append_length(res, "\\b", 2); break; + case '\f': res = gb_string_append_length(res, "\\f", 2); break; + case '\r': res = gb_string_append_length(res, "\\r", 2); break; + case '\t': res = gb_string_append_length(res, "\\t", 2); break; default: if ('\x00' <= c && c <= '\x1f') { - gb_fprintf(f, "\\u%04x", c); + res = gb_string_append_fmt(res, "\\u%04x", c); } else { - gb_file_write(f, &c, 1); + res = gb_string_append_length(res, &c, 1); } break; } + return res; }; GB_ASSERT(any_errors() || any_warnings()); - gbFile *f = gb_file_get_standard(gbFileStandard_Error); + array_sort(global_error_collector.error_values, error_value_cmp); + gbString res = gb_string_make(heap_allocator(), ""); + defer (gb_string_free(res)); if (json_errors()) { - gb_fprintf(f, "{\n"); - gb_fprintf(f, "\t\"error_count\": %td,\n", global_error_collector.error_values.count); - gb_fprintf(f, "\t\"errors\": [\n"); + res = gb_string_append_fmt(res, "{\n"); + res = gb_string_append_fmt(res, "\t\"error_count\": %td,\n", global_error_collector.error_values.count); + res = gb_string_append_fmt(res, "\t\"errors\": [\n"); for_array(i, global_error_collector.error_values) { ErrorValue ev = global_error_collector.error_values[i]; - gb_fprintf(f, "\t\t{\n"); + res = gb_string_append_fmt(res, "\t\t{\n"); - gb_fprintf(f, "\t\t\t\"type\": \""); + res = gb_string_append_fmt(res, "\t\t\t\"type\": \""); if (ev.kind == ErrorValue_Warning) { - gb_fprintf(f, "warning"); + res = gb_string_append_fmt(res, "warning"); } else { - gb_fprintf(f, "error"); + res = gb_string_append_fmt(res, "error"); } - gb_fprintf(f, "\",\n"); + res = gb_string_append_fmt(res, "\",\n"); - gb_fprintf(f, "\t\t\t\"pos\": {\n"); + res = gb_string_append_fmt(res, "\t\t\t\"pos\": {\n"); if (ev.pos.file_id) { - gb_fprintf(f, "\t\t\t\t\"file\": \""); + res = gb_string_append_fmt(res, "\t\t\t\t\"file\": \""); String file = get_file_path_string(ev.pos.file_id); for (isize k = 0; k < file.len; k++) { - escape_char(f, file.text[k]); + res = escape_char(res, file.text[k]); } - gb_fprintf(f, "\",\n"); - gb_fprintf(f, "\t\t\t\t\"offset\": %d,\n", ev.pos.offset); - gb_fprintf(f, "\t\t\t\t\"line\": %d,\n", ev.pos.line); - gb_fprintf(f, "\t\t\t\t\"column\": %d,\n", ev.pos.column); + res = gb_string_append_fmt(res, "\",\n"); + res = gb_string_append_fmt(res, "\t\t\t\t\"offset\": %d,\n", ev.pos.offset); + res = gb_string_append_fmt(res, "\t\t\t\t\"line\": %d,\n", ev.pos.line); + res = gb_string_append_fmt(res, "\t\t\t\t\"column\": %d,\n", ev.pos.column); i32 end_column = gb_max(ev.end.column, ev.pos.column); - gb_fprintf(f, "\t\t\t\t\"end_column\": %d\n", end_column); - gb_fprintf(f, "\t\t\t},\n"); + res = gb_string_append_fmt(res, "\t\t\t\t\"end_column\": %d\n", end_column); + res = gb_string_append_fmt(res, "\t\t\t},\n"); } - gb_fprintf(f, "\t\t\t\"msgs\": [\n"); + res = gb_string_append_fmt(res, "\t\t\t\"msgs\": [\n"); - if (ev.msgs.count > 1) { - gb_fprintf(f, "\t\t\t\t\""); + auto lines = split_lines_from_array(ev.msg, heap_allocator()); + defer (array_free(&lines)); - for (isize j = 1; j < ev.msgs.count; j++) { - String msg = ev.msgs[j]; - for (isize k = 0; k < msg.len; k++) { - u8 c = msg.text[k]; - if (c == '\n') { - if (k+1 == msg.len && j+1 == ev.msgs.count) { - // don't do the last one - } else { - gb_fprintf(f, "\",\n"); - gb_fprintf(f, "\t\t\t\t\""); - } - } else { - escape_char(f, c); - } + if (lines.count > 0) { + res = gb_string_append_fmt(res, "\t\t\t\t\""); + + for (isize j = 0; j < lines.count; j++) { + String line = lines[j]; + for (isize k = 0; k < line.len; k++) { + u8 c = line.text[k]; + res = escape_char(res, c); + } + if (j+1 < lines.count) { + res = gb_string_append_fmt(res, "\",\n"); + res = gb_string_append_fmt(res, "\t\t\t\t\""); } } - gb_fprintf(f, "\"\n"); + res = gb_string_append_fmt(res, "\"\n"); } - gb_fprintf(f, "\t\t\t]\n"); - gb_fprintf(f, "\t\t}"); + res = gb_string_append_fmt(res, "\t\t\t]\n"); + res = gb_string_append_fmt(res, "\t\t}"); if (i+1 != global_error_collector.error_values.count) { - gb_fprintf(f, ","); + res = gb_string_append_fmt(res, ","); } - gb_fprintf(f, "\n"); + res = gb_string_append_fmt(res, "\n"); } - gb_fprintf(f, "\t]\n"); - gb_fprintf(f, "}\n"); + res = gb_string_append_fmt(res, "\t]\n"); + res = gb_string_append_fmt(res, "}\n"); } else { for_array(i, global_error_collector.error_values) { ErrorValue ev = global_error_collector.error_values[i]; - for (isize j = 0; j < ev.msgs.count; j++) { - String msg = ev.msgs[j]; - gb_file_write(f, msg.text, msg.len); - if (terse_errors() && string_contains_char(msg, '\n')) { + String_Iterator it = {{ev.msg.data, ev.msg.count}, 0}; + + for (isize line_idx = 0; /**/; line_idx++) { + String line = string_split_iterator(&it, '\n'); + if (line.len == 0) { + break; + } + line = string_trim_trailing_whitespace(line); + res = gb_string_append_length(res, line.text, line.len); + res = gb_string_append_length(res, " \n", 2); + if (line_idx == 0 && terse_errors()) { break; } } } } + gbFile *f = gb_file_get_standard(gbFileStandard_Error); + gb_file_write(f, res, gb_string_length(res)); } \ No newline at end of file diff --git a/src/string.cpp b/src/string.cpp index 4adec7a90..3747f4564 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -276,6 +276,43 @@ gb_internal String string_trim_whitespace(String str) { return str; } +gb_internal String string_trim_trailing_whitespace(String str) { + while (str.len > 0) { + u8 c = str[str.len-1]; + if (rune_is_whitespace(c) || c == 0) { + str.len -= 1; + } else { + break; + } + } + return str; +} + +gb_internal String split_lines_first_line_from_array(Array const &array, gbAllocator allocator) { + String_Iterator it = {{array.data, array.count}, 0}; + + String line = string_split_iterator(&it, '\n'); + line = string_trim_trailing_whitespace(line); + return line; +} + +gb_internal Array split_lines_from_array(Array const &array, gbAllocator allocator) { + Array lines = {}; + lines.allocator = allocator; + + String_Iterator it = {{array.data, array.count}, 0}; + + for (;;) { + String line = string_split_iterator(&it, '\n'); + if (line.len == 0) { + break; + } + line = string_trim_trailing_whitespace(line); + array_add(&lines, line); + } + + return lines; +} gb_internal bool string_contains_char(String const &s, u8 c) { isize i; From a61ae7c861fa301684ee1582507061317b11426b Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 16 Apr 2024 13:31:49 +0100 Subject: [PATCH 39/66] Fix #3427 --- src/check_type.cpp | 20 ++++++++++++++------ src/checker.hpp | 5 ++++- src/llvm_backend_general.cpp | 2 +- src/llvm_backend_type.cpp | 2 +- src/llvm_backend_utility.cpp | 4 ++-- src/types.cpp | 1 - 6 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/check_type.cpp b/src/check_type.cpp index f4e5d7c96..3bb1a4fd1 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -2495,18 +2495,16 @@ gb_internal Type *get_map_cell_type(Type *type) { return s; } -gb_internal void init_map_internal_types(Type *type) { +gb_internal void init_map_internal_debug_types(Type *type) { GB_ASSERT(type->kind == Type_Map); GB_ASSERT(t_allocator != nullptr); - if (type->Map.lookup_result_type != nullptr) return; + if (type->Map.debug_metadata_type != nullptr) return; Type *key = type->Map.key; Type *value = type->Map.value; GB_ASSERT(key != nullptr); GB_ASSERT(value != nullptr); - - Type *key_cell = get_map_cell_type(key); Type *value_cell = get_map_cell_type(value); @@ -2541,6 +2539,18 @@ gb_internal void init_map_internal_types(Type *type) { gb_unused(type_size_of(debug_type)); type->Map.debug_metadata_type = debug_type; +} + + +gb_internal void init_map_internal_types(Type *type) { + GB_ASSERT(type->kind == Type_Map); + GB_ASSERT(t_allocator != nullptr); + if (type->Map.lookup_result_type != nullptr) return; + + Type *key = type->Map.key; + Type *value = type->Map.value; + GB_ASSERT(key != nullptr); + GB_ASSERT(value != nullptr); type->Map.lookup_result_type = make_optional_ok_type(value); } @@ -2613,8 +2623,6 @@ gb_internal void check_map_type(CheckerContext *ctx, Type *type, Ast *node) { init_core_map_type(ctx->checker); init_map_internal_types(type); - - // error(node, "'map' types are not yet implemented"); } gb_internal void check_matrix_type(CheckerContext *ctx, Type **type, Ast *node) { diff --git a/src/checker.hpp b/src/checker.hpp index 1701da58d..2ade9312e 100644 --- a/src/checker.hpp +++ b/src/checker.hpp @@ -563,4 +563,7 @@ gb_internal void init_mem_allocator(Checker *c); gb_internal void add_untyped_expressions(CheckerInfo *cinfo, UntypedExprInfoMap *untyped); -gb_internal GenTypesData *ensure_polymorphic_record_entity_has_gen_types(CheckerContext *ctx, Type *original_type); \ No newline at end of file +gb_internal GenTypesData *ensure_polymorphic_record_entity_has_gen_types(CheckerContext *ctx, Type *original_type); + + +gb_internal void init_map_internal_types(Type *type); \ No newline at end of file diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 73e4a00e6..7a5ed5635 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -2070,7 +2070,7 @@ gb_internal LLVMTypeRef lb_type_internal(lbModule *m, Type *type) { break; case Type_Map: - init_map_internal_types(type); + init_map_internal_debug_types(type); GB_ASSERT(t_raw_map != nullptr); return lb_type_internal(m, t_raw_map); diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 93e2874a5..0bac2f732 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -903,7 +903,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ case Type_Map: { tag_type = t_type_info_map; - init_map_internal_types(t); + init_map_internal_debug_types(t); LLVMValueRef vals[3] = { get_type_info_ptr(m, t->Map.key), diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 865c3f1ec..0d1db2cbf 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -1125,7 +1125,7 @@ gb_internal lbValue lb_emit_struct_ep(lbProcedure *p, lbValue s, i32 index) { case 3: result_type = t_allocator; break; } } else if (is_type_map(t)) { - init_map_internal_types(t); + init_map_internal_debug_types(t); Type *itp = alloc_type_pointer(t_raw_map); s = lb_emit_transmute(p, s, itp); @@ -1264,7 +1264,7 @@ gb_internal lbValue lb_emit_struct_ev(lbProcedure *p, lbValue s, i32 index) { case Type_Map: { - init_map_internal_types(t); + init_map_internal_debug_types(t); switch (index) { case 0: result_type = get_struct_field_type(t_raw_map, 0); break; case 1: result_type = get_struct_field_type(t_raw_map, 1); break; diff --git a/src/types.cpp b/src/types.cpp index 97512d29b..18cb12ea1 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -769,7 +769,6 @@ gb_internal gbString type_to_string (Type *type, bool shorthand=true); gb_internal gbString type_to_string (Type *type, gbAllocator allocator, bool shorthand=true); gb_internal i64 type_size_of_internal(Type *t, TypePath *path); gb_internal i64 type_align_of_internal(Type *t, TypePath *path); -gb_internal void init_map_internal_types(Type *type); gb_internal Type * bit_set_to_int(Type *t); gb_internal bool are_types_identical(Type *x, Type *y); From 6dcf38b85bc7ab7659671f7ac1d0dfdc5d942163 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 16 Apr 2024 21:36:54 +0100 Subject: [PATCH 40/66] Correct `copy_from_string` docs --- base/runtime/core_builtin.odin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/runtime/core_builtin.odin b/base/runtime/core_builtin.odin index c5c419de3..00c30d3fd 100644 --- a/base/runtime/core_builtin.odin +++ b/base/runtime/core_builtin.odin @@ -40,7 +40,7 @@ copy_slice :: proc "contextless" (dst, src: $T/[]$E) -> int { } return n } -// `copy_from_string` is a built-in procedure that copies elements from a source slice `src` to a destination string `dst`. +// `copy_from_string` is a built-in procedure that copies elements from a source string `src` to a destination slice `dst`. // The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum // of len(src) and len(dst). // @@ -53,7 +53,7 @@ copy_from_string :: proc "contextless" (dst: $T/[]$E/u8, src: $S/string) -> int } return n } -// `copy` is a built-in procedure that copies elements from a source slice `src` to a destination slice/string `dst`. +// `copy` is a built-in procedure that copies elements from a source slice/string `src` to a destination slice `dst`. // The source and destination may overlap. Copy returns the number of elements copied, which will be the minimum // of len(src) and len(dst). @builtin From 7cd2bc26f42237f825274198c5bc68f7633b73b1 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Wed, 17 Apr 2024 13:31:32 +0100 Subject: [PATCH 41/66] Clear error message on collisions with `using` on struct fields --- src/check_type.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/check_type.cpp b/src/check_type.cpp index 3bb1a4fd1..a6dbb8dfc 100644 --- a/src/check_type.cpp +++ b/src/check_type.cpp @@ -29,10 +29,11 @@ gb_internal void populate_using_array_index(CheckerContext *ctx, Ast *node, AstF } } -gb_internal void populate_using_entity_scope(CheckerContext *ctx, Ast *node, AstField *field, Type *t) { +gb_internal void populate_using_entity_scope(CheckerContext *ctx, Ast *node, AstField *field, Type *t, isize level) { if (t == nullptr) { return; } + Type *original_type = t; t = base_type(type_deref(t)); gbString str = nullptr; defer (gb_string_free(str)); @@ -46,16 +47,18 @@ gb_internal void populate_using_entity_scope(CheckerContext *ctx, Ast *node, Ast String name = f->token.string; Entity *e = scope_lookup_current(ctx->scope, name); if (e != nullptr && name != "_") { + gbString ot = type_to_string(original_type); // TODO(bill): Better type error if (str != nullptr) { - error(e->token, "'%.*s' is already declared in '%s'", LIT(name), str); + error(e->token, "'%.*s' is already declared in '%s', through 'using' from '%s'", LIT(name), str, ot); } else { - error(e->token, "'%.*s' is already declared", LIT(name)); + error(e->token, "'%.*s' is already declared, through 'using' from '%s'", LIT(name), ot); } + gb_string_free(ot); } else { add_entity(ctx, ctx->scope, nullptr, f); if (f->flags & EntityFlag_Using) { - populate_using_entity_scope(ctx, node, field, f->type); + populate_using_entity_scope(ctx, node, field, f->type, level+1); } } } @@ -200,7 +203,7 @@ gb_internal void check_struct_fields(CheckerContext *ctx, Ast *node, Slicenames.count > 0) { From e296b050eeca33ef44eb71e00532a8f2f971cb01 Mon Sep 17 00:00:00 2001 From: Matias Fernandez Date: Wed, 17 Apr 2024 22:54:30 -0400 Subject: [PATCH 42/66] fix #soa '%#v' formatting --- core/fmt/fmt.odin | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index d3b9d7d69..7f432a6be 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -1900,7 +1900,7 @@ fmt_struct :: proc(fi: ^Info, v: any, the_verb: rune, info: runtime.Type_Info_St // fi.hash = false; fi.indent += 1 - if hash { + if !is_soa && hash { io.write_byte(fi.writer, '\n', &fi.n) } defer { @@ -1934,6 +1934,9 @@ fmt_struct :: proc(fi: ^Info, v: any, the_verb: rune, info: runtime.Type_Info_St n = uintptr((^int)(uintptr(v.data) + info.offsets[actual_field_count])^) } + if hash && n > 0 { + io.write_byte(fi.writer, '\n', &fi.n) + } for index in 0.. 0 { io.write_string(fi.writer, ", ", &fi.n) } @@ -1942,9 +1945,23 @@ fmt_struct :: proc(fi: ^Info, v: any, the_verb: rune, info: runtime.Type_Info_St if !hash && field_count > 0 { io.write_string(fi.writer, ", ", &fi.n) } + if hash { + fi.indent -= 1 + fmt_write_indent(fi) + fi.indent += 1 + } io.write_string(fi.writer, base_type_name, &fi.n) io.write_byte(fi.writer, '{', &fi.n) - defer io.write_byte(fi.writer, '}', &fi.n) + if hash { io.write_byte(fi.writer, '\n', &fi.n) } + defer { + if hash { + fi.indent -= 1 + fmt_write_indent(fi) + fi.indent += 1 + } + io.write_byte(fi.writer, '}', &fi.n) + if hash { io.write_string(fi.writer, ",\n", &fi.n) } + } fi.record_level += 1 defer fi.record_level -= 1 From 6127339c56e15eeb4282ea385333c98d690091ee Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 10:59:02 +0100 Subject: [PATCH 43/66] Add `#force_no_inline` to many of the runtime print procedures --- base/runtime/print.odin | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/base/runtime/print.odin b/base/runtime/print.odin index c93c2ab49..e8da84f14 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -6,7 +6,7 @@ _INTEGER_DIGITS :: "0123456789abcdefghijklmnopqrstuvwxyz" _INTEGER_DIGITS_VAR := _INTEGER_DIGITS when !ODIN_NO_RTTI { - print_any_single :: proc "contextless" (arg: any) { + print_any_single :: #force_no_inline proc "contextless" (arg: any) { x := arg if x.data == nil { print_string("nil") @@ -72,7 +72,7 @@ when !ODIN_NO_RTTI { print_string("") } } - println_any :: proc "contextless" (args: ..any) { + println_any :: #force_no_inline proc "contextless" (args: ..any) { context = default_context() loop: for arg, i in args { assert(arg.id != nil) @@ -122,12 +122,12 @@ encode_rune :: proc "contextless" (c: rune) -> ([4]u8, int) { return buf, 4 } -print_string :: proc "contextless" (str: string) -> (n: int) { +print_string :: #force_no_inline proc "contextless" (str: string) -> (n: int) { n, _ = stderr_write(transmute([]byte)str) return } -print_strings :: proc "contextless" (args: ..string) -> (n: int) { +print_strings :: #force_no_inline proc "contextless" (args: ..string) -> (n: int) { for str in args { m, err := stderr_write(transmute([]byte)str) n += m @@ -138,12 +138,12 @@ print_strings :: proc "contextless" (args: ..string) -> (n: int) { return } -print_byte :: proc "contextless" (b: byte) -> (n: int) { +print_byte :: #force_no_inline proc "contextless" (b: byte) -> (n: int) { n, _ = stderr_write([]byte{b}) return } -print_encoded_rune :: proc "contextless" (r: rune) { +print_encoded_rune :: #force_no_inline proc "contextless" (r: rune) { print_byte('\'') switch r { @@ -170,7 +170,7 @@ print_encoded_rune :: proc "contextless" (r: rune) { print_byte('\'') } -print_rune :: proc "contextless" (r: rune) -> int #no_bounds_check { +print_rune :: #force_no_inline proc "contextless" (r: rune) -> int #no_bounds_check { RUNE_SELF :: 0x80 if r < RUNE_SELF { @@ -183,7 +183,7 @@ print_rune :: proc "contextless" (r: rune) -> int #no_bounds_check { } -print_u64 :: proc "contextless" (x: u64) #no_bounds_check { +print_u64 :: #force_no_inline proc "contextless" (x: u64) #no_bounds_check { a: [129]byte i := len(a) b := u64(10) @@ -198,7 +198,7 @@ print_u64 :: proc "contextless" (x: u64) #no_bounds_check { } -print_i64 :: proc "contextless" (x: i64) #no_bounds_check { +print_i64 :: #force_no_inline proc "contextless" (x: i64) #no_bounds_check { b :: i64(10) u := x @@ -223,7 +223,7 @@ print_uint :: proc "contextless" (x: uint) { print_u64(u64(x)) } print_uintptr :: proc "contextless" (x: uintptr) { print_u64(u64(x)) } print_int :: proc "contextless" (x: int) { print_i64(i64(x)) } -print_caller_location :: proc "contextless" (loc: Source_Code_Location) { +print_caller_location :: #force_no_inline proc "contextless" (loc: Source_Code_Location) { print_string(loc.file_path) when ODIN_ERROR_POS_STYLE == .Default { print_byte('(') @@ -241,7 +241,7 @@ print_caller_location :: proc "contextless" (loc: Source_Code_Location) { #panic("unhandled ODIN_ERROR_POS_STYLE") } } -print_typeid :: proc "contextless" (id: typeid) { +print_typeid :: #force_no_inline proc "contextless" (id: typeid) { when ODIN_NO_RTTI { if id == nil { print_string("nil") @@ -257,7 +257,7 @@ print_typeid :: proc "contextless" (id: typeid) { } } } -print_type :: proc "contextless" (ti: ^Type_Info) { +print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { if ti == nil { print_string("nil") return From 889cd5461ca3601d5359269345594903bd9ed5ca Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 11:00:47 +0100 Subject: [PATCH 44/66] Add `@(optimization_mode="size")` to `runtime.print_type` --- base/runtime/print.odin | 2 ++ 1 file changed, 2 insertions(+) diff --git a/base/runtime/print.odin b/base/runtime/print.odin index e8da84f14..4e2ffaf80 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -257,6 +257,8 @@ print_typeid :: #force_no_inline proc "contextless" (id: typeid) { } } } + +@(optimization_mode="size") print_type :: #force_no_inline proc "contextless" (ti: ^Type_Info) { if ti == nil { print_string("nil") From 5c52f3cf2fee63f860914d062555ed87c1dba9d8 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 11:13:44 +0100 Subject: [PATCH 45/66] Add `ODIN_NO_BOUNDS_CHECK` --- base/runtime/error_checks.odin | 14 +++++++++++++- src/checker.cpp | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/base/runtime/error_checks.odin b/base/runtime/error_checks.odin index ea6333c29..742e06a71 100644 --- a/base/runtime/error_checks.odin +++ b/base/runtime/error_checks.odin @@ -19,6 +19,7 @@ type_assertion_trap :: proc "contextless" () -> ! { } +@(disabled=ODIN_NO_BOUNDS_CHECK) bounds_check_error :: proc "contextless" (file: string, line, column: i32, index, count: int) { if uint(index) < uint(count) { return @@ -61,6 +62,7 @@ multi_pointer_slice_handle_error :: proc "contextless" (file: string, line, colu } +@(disabled=ODIN_NO_BOUNDS_CHECK) multi_pointer_slice_expr_error :: proc "contextless" (file: string, line, column: i32, lo, hi: int) { if lo <= hi { return @@ -68,6 +70,7 @@ multi_pointer_slice_expr_error :: proc "contextless" (file: string, line, column multi_pointer_slice_handle_error(file, line, column, lo, hi) } +@(disabled=ODIN_NO_BOUNDS_CHECK) slice_expr_error_hi :: proc "contextless" (file: string, line, column: i32, hi: int, len: int) { if 0 <= hi && hi <= len { return @@ -75,6 +78,7 @@ slice_expr_error_hi :: proc "contextless" (file: string, line, column: i32, hi: slice_handle_error(file, line, column, 0, hi, len) } +@(disabled=ODIN_NO_BOUNDS_CHECK) slice_expr_error_lo_hi :: proc "contextless" (file: string, line, column: i32, lo, hi: int, len: int) { if 0 <= lo && lo <= len && lo <= hi && hi <= len { return @@ -82,6 +86,7 @@ slice_expr_error_lo_hi :: proc "contextless" (file: string, line, column: i32, l slice_handle_error(file, line, column, lo, hi, len) } +@(disabled=ODIN_NO_BOUNDS_CHECK) dynamic_array_expr_error :: proc "contextless" (file: string, line, column: i32, low, high, max: int) { if 0 <= low && low <= high && high <= max { return @@ -102,6 +107,7 @@ dynamic_array_expr_error :: proc "contextless" (file: string, line, column: i32, } +@(disabled=ODIN_NO_BOUNDS_CHECK) matrix_bounds_check_error :: proc "contextless" (file: string, line, column: i32, row_index, column_index, row_count, column_count: int) { if uint(row_index) < uint(row_count) && uint(column_index) < uint(column_count) { @@ -224,6 +230,7 @@ when ODIN_NO_RTTI { } +@(disabled=ODIN_NO_BOUNDS_CHECK) make_slice_error_loc :: #force_inline proc "contextless" (loc := #caller_location, len: int) { if 0 <= len { return @@ -239,6 +246,7 @@ make_slice_error_loc :: #force_inline proc "contextless" (loc := #caller_locatio handle_error(loc, len) } +@(disabled=ODIN_NO_BOUNDS_CHECK) make_dynamic_array_error_loc :: #force_inline proc "contextless" (loc := #caller_location, len, cap: int) { if 0 <= len && len <= cap { return @@ -256,6 +264,7 @@ make_dynamic_array_error_loc :: #force_inline proc "contextless" (loc := #caller handle_error(loc, len, cap) } +@(disabled=ODIN_NO_BOUNDS_CHECK) make_map_expr_error_loc :: #force_inline proc "contextless" (loc := #caller_location, cap: int) { if 0 <= cap { return @@ -274,19 +283,22 @@ make_map_expr_error_loc :: #force_inline proc "contextless" (loc := #caller_loca - +@(disabled=ODIN_NO_BOUNDS_CHECK) bounds_check_error_loc :: #force_inline proc "contextless" (loc := #caller_location, index, count: int) { bounds_check_error(loc.file_path, loc.line, loc.column, index, count) } +@(disabled=ODIN_NO_BOUNDS_CHECK) slice_expr_error_hi_loc :: #force_inline proc "contextless" (loc := #caller_location, hi: int, len: int) { slice_expr_error_hi(loc.file_path, loc.line, loc.column, hi, len) } +@(disabled=ODIN_NO_BOUNDS_CHECK) slice_expr_error_lo_hi_loc :: #force_inline proc "contextless" (loc := #caller_location, lo, hi: int, len: int) { slice_expr_error_lo_hi(loc.file_path, loc.line, loc.column, lo, hi, len) } +@(disabled=ODIN_NO_BOUNDS_CHECK) dynamic_array_expr_error_loc :: #force_inline proc "contextless" (loc := #caller_location, low, high, max: int) { dynamic_array_expr_error(loc.file_path, loc.line, loc.column, low, high, max) } diff --git a/src/checker.cpp b/src/checker.cpp index 2b3ca0e9f..e82836b2a 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1111,6 +1111,7 @@ gb_internal void init_universal(void) { add_global_bool_constant("ODIN_DEBUG", bc->ODIN_DEBUG); add_global_bool_constant("ODIN_DISABLE_ASSERT", bc->ODIN_DISABLE_ASSERT); add_global_bool_constant("ODIN_DEFAULT_TO_NIL_ALLOCATOR", bc->ODIN_DEFAULT_TO_NIL_ALLOCATOR); + add_global_bool_constant("ODIN_NO_BOUNDS_CHECK", build_context.no_bounds_check); add_global_bool_constant("ODIN_DEFAULT_TO_PANIC_ALLOCATOR", bc->ODIN_DEFAULT_TO_PANIC_ALLOCATOR); add_global_bool_constant("ODIN_NO_DYNAMIC_LITERALS", bc->no_dynamic_literals); add_global_bool_constant("ODIN_NO_CRT", bc->no_crt); From ece78d22d2b549116a0884d3578972b8f389f983 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 11:22:31 +0100 Subject: [PATCH 46/66] Add `-no-type-assert` and `ODIN_NO_TYPE_ASSERT` --- src/build_settings.cpp | 1 + src/checker.cpp | 1 + src/llvm_backend_expr.cpp | 4 +- src/llvm_backend_utility.cpp | 80 +++++++++++++++++++----------------- src/main.cpp | 9 ++++ 5 files changed, 55 insertions(+), 40 deletions(-) diff --git a/src/build_settings.cpp b/src/build_settings.cpp index 106ae8a28..b806adcd6 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -382,6 +382,7 @@ struct BuildContext { bool keep_temp_files; bool ignore_unknown_attributes; bool no_bounds_check; + bool no_type_assert; bool no_dynamic_literals; bool no_output_files; bool no_crt; diff --git a/src/checker.cpp b/src/checker.cpp index e82836b2a..b7fe2b903 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1112,6 +1112,7 @@ gb_internal void init_universal(void) { add_global_bool_constant("ODIN_DISABLE_ASSERT", bc->ODIN_DISABLE_ASSERT); add_global_bool_constant("ODIN_DEFAULT_TO_NIL_ALLOCATOR", bc->ODIN_DEFAULT_TO_NIL_ALLOCATOR); add_global_bool_constant("ODIN_NO_BOUNDS_CHECK", build_context.no_bounds_check); + add_global_bool_constant("ODIN_NO_TYPE_ASSERT", build_context.no_type_assert); add_global_bool_constant("ODIN_DEFAULT_TO_PANIC_ALLOCATOR", bc->ODIN_DEFAULT_TO_PANIC_ALLOCATOR); add_global_bool_constant("ODIN_NO_DYNAMIC_LITERALS", bc->no_dynamic_literals); add_global_bool_constant("ODIN_NO_CRT", bc->no_crt); diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index ad28f2e5e..edd5daeca 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -3116,7 +3116,7 @@ gb_internal lbValue lb_build_unary_and(lbProcedure *p, Ast *expr) { Type *dst_type = type; - if ((p->state_flags & StateFlag_no_type_assert) == 0) { + if (!build_context.no_type_assert && (p->state_flags & StateFlag_no_type_assert) == 0) { lbValue src_tag = {}; lbValue dst_tag = {}; if (is_type_union_maybe_pointer(src_type)) { @@ -3156,7 +3156,7 @@ gb_internal lbValue lb_build_unary_and(lbProcedure *p, Ast *expr) { v = lb_emit_load(p, v); } lbValue data_ptr = lb_emit_struct_ev(p, v, 0); - if ((p->state_flags & StateFlag_no_type_assert) == 0) { + if (!build_context.no_type_assert && (p->state_flags & StateFlag_no_type_assert) == 0) { GB_ASSERT(!build_context.no_rtti); lbValue any_id = lb_emit_struct_ev(p, v, 1); diff --git a/src/llvm_backend_utility.cpp b/src/llvm_backend_utility.cpp index 0d1db2cbf..2dd7fbc38 100644 --- a/src/llvm_backend_utility.cpp +++ b/src/llvm_backend_utility.cpp @@ -728,30 +728,32 @@ gb_internal lbValue lb_emit_union_cast(lbProcedure *p, lbValue value, Type *type lb_start_block(p, end_block); if (!is_tuple) { - GB_ASSERT((p->state_flags & StateFlag_no_type_assert) == 0); - // NOTE(bill): Panic on invalid conversion - Type *dst_type = tuple->Tuple.variables[0]->type; + if (!build_context.no_type_assert) { + GB_ASSERT((p->state_flags & StateFlag_no_type_assert) == 0); + // NOTE(bill): Panic on invalid conversion + Type *dst_type = tuple->Tuple.variables[0]->type; - isize arg_count = 7; - if (build_context.no_rtti) { - arg_count = 4; + isize arg_count = 7; + if (build_context.no_rtti) { + arg_count = 4; + } + + lbValue ok = lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 1)); + auto args = array_make(permanent_allocator(), arg_count); + args[0] = ok; + + args[1] = lb_const_string(m, get_file_path_string(pos.file_id)); + args[2] = lb_const_int(m, t_i32, pos.line); + args[3] = lb_const_int(m, t_i32, pos.column); + + if (!build_context.no_rtti) { + args[4] = lb_typeid(m, src_type); + args[5] = lb_typeid(m, dst_type); + args[6] = lb_emit_conv(p, value_, t_rawptr); + } + lb_emit_runtime_call(p, "type_assertion_check2", args); } - lbValue ok = lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 1)); - auto args = array_make(permanent_allocator(), arg_count); - args[0] = ok; - - args[1] = lb_const_string(m, get_file_path_string(pos.file_id)); - args[2] = lb_const_int(m, t_i32, pos.line); - args[3] = lb_const_int(m, t_i32, pos.column); - - if (!build_context.no_rtti) { - args[4] = lb_typeid(m, src_type); - args[5] = lb_typeid(m, dst_type); - args[6] = lb_emit_conv(p, value_, t_rawptr); - } - lb_emit_runtime_call(p, "type_assertion_check2", args); - return lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 0)); } return lb_addr_load(p, v); @@ -806,25 +808,27 @@ gb_internal lbAddr lb_emit_any_cast_addr(lbProcedure *p, lbValue value, Type *ty if (!is_tuple) { // NOTE(bill): Panic on invalid conversion - lbValue ok = lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 1)); + if (!build_context.no_type_assert) { + lbValue ok = lb_emit_load(p, lb_emit_struct_ep(p, v.addr, 1)); - isize arg_count = 7; - if (build_context.no_rtti) { - arg_count = 4; + isize arg_count = 7; + if (build_context.no_rtti) { + arg_count = 4; + } + auto args = array_make(permanent_allocator(), arg_count); + args[0] = ok; + + args[1] = lb_const_string(m, get_file_path_string(pos.file_id)); + args[2] = lb_const_int(m, t_i32, pos.line); + args[3] = lb_const_int(m, t_i32, pos.column); + + if (!build_context.no_rtti) { + args[4] = any_typeid; + args[5] = dst_typeid; + args[6] = lb_emit_struct_ev(p, value, 0); + } + lb_emit_runtime_call(p, "type_assertion_check2", args); } - auto args = array_make(permanent_allocator(), arg_count); - args[0] = ok; - - args[1] = lb_const_string(m, get_file_path_string(pos.file_id)); - args[2] = lb_const_int(m, t_i32, pos.line); - args[3] = lb_const_int(m, t_i32, pos.column); - - if (!build_context.no_rtti) { - args[4] = any_typeid; - args[5] = dst_typeid; - args[6] = lb_emit_struct_ev(p, value, 0); - } - lb_emit_runtime_call(p, "type_assertion_check2", args); return lb_addr(lb_emit_struct_ep(p, v.addr, 0)); } diff --git a/src/main.cpp b/src/main.cpp index 063b6c8b3..53103ce3a 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -243,6 +243,7 @@ enum BuildFlagKind { BuildFlag_Debug, BuildFlag_DisableAssert, BuildFlag_NoBoundsCheck, + BuildFlag_NoTypeAssert, BuildFlag_NoDynamicLiterals, BuildFlag_NoCRT, BuildFlag_NoEntryPoint, @@ -436,6 +437,7 @@ gb_internal bool parse_build_flags(Array args) { add_flag(&build_flags, BuildFlag_Debug, str_lit("debug"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_DisableAssert, str_lit("disable-assert"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_NoBoundsCheck, str_lit("no-bounds-check"), BuildFlagParam_None, Command__does_check); + add_flag(&build_flags, BuildFlag_NoTypeAssert, str_lit("no-type-assert"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_NoThreadLocal, str_lit("no-thread-local"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_NoDynamicLiterals, str_lit("no-dynamic-literals"), BuildFlagParam_None, Command__does_check); add_flag(&build_flags, BuildFlag_NoCRT, str_lit("no-crt"), BuildFlagParam_None, Command__does_build); @@ -1013,6 +1015,9 @@ gb_internal bool parse_build_flags(Array args) { case BuildFlag_NoBoundsCheck: build_context.no_bounds_check = true; break; + case BuildFlag_NoTypeAssert: + build_context.no_type_assert = true; + break; case BuildFlag_NoDynamicLiterals: build_context.no_dynamic_literals = true; break; @@ -1850,6 +1855,10 @@ gb_internal void print_show_help(String const arg0, String const &command) { print_usage_line(2, "Disables bounds checking program wide."); print_usage_line(0, ""); + print_usage_line(1, "-no-type-assert"); + print_usage_line(2, "Disables type assertion checking program wide."); + print_usage_line(0, ""); + print_usage_line(1, "-no-crt"); print_usage_line(2, "Disables automatic linking with the C Run Time."); print_usage_line(0, ""); From aad41fc76210814f4bf708927a10d17de0e2621d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 11:27:42 +0100 Subject: [PATCH 47/66] Fix #3445 --- src/llvm_backend_debug.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index 511ff0475..b430cf894 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -739,6 +739,7 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) { } case Type_Map: { + init_map_internal_debug_types(bt); Type *bt = base_type(type->Map.debug_metadata_type); GB_ASSERT(bt->kind == Type_Struct); @@ -945,6 +946,7 @@ gb_internal LLVMMetadataRef lb_debug_type(lbModule *m, Type *type) { } case Type_Map: { + init_map_internal_debug_types(bt); bt = base_type(bt->Map.debug_metadata_type); GB_ASSERT(bt->kind == Type_Struct); return lb_debug_struct(m, type, bt, name, scope, file, line); From f84a09297795bd5c6d00305e0ad32dd696fe36a7 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 11:28:46 +0100 Subject: [PATCH 48/66] Fix typo. --- src/llvm_backend_debug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index b430cf894..853941496 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -739,7 +739,7 @@ gb_internal LLVMMetadataRef lb_debug_type_internal(lbModule *m, Type *type) { } case Type_Map: { - init_map_internal_debug_types(bt); + init_map_internal_debug_types(type); Type *bt = base_type(type->Map.debug_metadata_type); GB_ASSERT(bt->kind == Type_Struct); From 334e08c750a1eac3042b867c0e460b7bc516743e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 12:17:39 +0100 Subject: [PATCH 49/66] Update numerous package declaration names --- core/compress/gzip/example.odin | 2 +- core/compress/gzip/gzip.odin | 2 +- core/compress/shoco/model.odin | 2 +- core/compress/shoco/shoco.odin | 2 +- core/compress/zlib/example.odin | 2 +- core/compress/zlib/zlib.odin | 2 +- core/container/bit_array/bit_array.odin | 2 +- core/container/bit_array/doc.odin | 2 +- core/encoding/base32/base32.odin | 2 +- core/encoding/base64/base64.odin | 2 +- core/encoding/cbor/cbor.odin | 2 +- core/encoding/cbor/coding.odin | 2 +- core/encoding/cbor/doc.odin | 2 +- core/encoding/cbor/marshal.odin | 2 +- core/encoding/cbor/tags.odin | 2 +- core/encoding/cbor/unmarshal.odin | 2 +- core/encoding/csv/reader.odin | 2 +- core/encoding/csv/writer.odin | 2 +- core/encoding/entity/entity.odin | 2 +- core/encoding/entity/generated.odin | 2 +- core/encoding/hex/hex.odin | 2 +- core/encoding/json/marshal.odin | 2 +- core/encoding/json/parser.odin | 2 +- core/encoding/json/tokenizer.odin | 2 +- core/encoding/json/types.odin | 2 +- core/encoding/json/unmarshal.odin | 2 +- core/encoding/json/validator.odin | 2 +- core/encoding/varint/doc.odin | 2 +- core/encoding/varint/leb128.odin | 2 +- core/encoding/xml/debug_print.odin | 2 +- core/encoding/xml/helpers.odin | 2 +- core/encoding/xml/tokenizer.odin | 2 +- core/encoding/xml/xml_reader.odin | 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) diff --git a/core/compress/gzip/example.odin b/core/compress/gzip/example.odin index 635134e40..09540aafc 100644 --- a/core/compress/gzip/example.odin +++ b/core/compress/gzip/example.odin @@ -1,5 +1,5 @@ //+build ignore -package gzip +package compress_gzip /* Copyright 2021 Jeroen van Rijn . diff --git a/core/compress/gzip/gzip.odin b/core/compress/gzip/gzip.odin index 50945fc77..57ed3c3c5 100644 --- a/core/compress/gzip/gzip.odin +++ b/core/compress/gzip/gzip.odin @@ -1,4 +1,4 @@ -package gzip +package compress_gzip /* Copyright 2021 Jeroen van Rijn . diff --git a/core/compress/shoco/model.odin b/core/compress/shoco/model.odin index bbc38903d..f62236c00 100644 --- a/core/compress/shoco/model.odin +++ b/core/compress/shoco/model.odin @@ -5,7 +5,7 @@ */ // package shoco is an implementation of the shoco short string compressor -package shoco +package compress_shoco DEFAULT_MODEL :: Shoco_Model { min_char = 39, diff --git a/core/compress/shoco/shoco.odin b/core/compress/shoco/shoco.odin index e65acb0bc..3c1f412ba 100644 --- a/core/compress/shoco/shoco.odin +++ b/core/compress/shoco/shoco.odin @@ -9,7 +9,7 @@ */ // package shoco is an implementation of the shoco short string compressor -package shoco +package compress_shoco import "base:intrinsics" import "core:compress" diff --git a/core/compress/zlib/example.odin b/core/compress/zlib/example.odin index 19017b279..fedd6671d 100644 --- a/core/compress/zlib/example.odin +++ b/core/compress/zlib/example.odin @@ -1,5 +1,5 @@ //+build ignore -package zlib +package compress_zlib /* Copyright 2021 Jeroen van Rijn . diff --git a/core/compress/zlib/zlib.odin b/core/compress/zlib/zlib.odin index d4dc6e3d7..b7f381f2b 100644 --- a/core/compress/zlib/zlib.odin +++ b/core/compress/zlib/zlib.odin @@ -1,5 +1,5 @@ //+vet !using-param -package zlib +package compress_zlib /* Copyright 2021 Jeroen van Rijn . diff --git a/core/container/bit_array/bit_array.odin b/core/container/bit_array/bit_array.odin index dbd2e0d3a..a8720715c 100644 --- a/core/container/bit_array/bit_array.odin +++ b/core/container/bit_array/bit_array.odin @@ -1,4 +1,4 @@ -package dynamic_bit_array +package container_dynamic_bit_array import "base:intrinsics" import "core:mem" diff --git a/core/container/bit_array/doc.odin b/core/container/bit_array/doc.odin index 371f63f0e..77e1904a8 100644 --- a/core/container/bit_array/doc.odin +++ b/core/container/bit_array/doc.odin @@ -49,4 +49,4 @@ The Bit Array can be used in several ways: fmt.printf("Freed.\n") } */ -package dynamic_bit_array +package container_dynamic_bit_array diff --git a/core/encoding/base32/base32.odin b/core/encoding/base32/base32.odin index 7ab35afd0..962a3ead4 100644 --- a/core/encoding/base32/base32.odin +++ b/core/encoding/base32/base32.odin @@ -1,4 +1,4 @@ -package base32 +package encoding_base32 // @note(zh): Encoding utility for Base32 // A secondary param can be used to supply a custom alphabet to diff --git a/core/encoding/base64/base64.odin b/core/encoding/base64/base64.odin index 535d457d5..1013a7d0b 100644 --- a/core/encoding/base64/base64.odin +++ b/core/encoding/base64/base64.odin @@ -1,4 +1,4 @@ -package base64 +package encoding_base64 import "core:io" import "core:mem" diff --git a/core/encoding/cbor/cbor.odin b/core/encoding/cbor/cbor.odin index 550cf87fd..d0e406ab1 100644 --- a/core/encoding/cbor/cbor.odin +++ b/core/encoding/cbor/cbor.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:intrinsics" diff --git a/core/encoding/cbor/coding.odin b/core/encoding/cbor/coding.odin index 11db994da..0d276a7a1 100644 --- a/core/encoding/cbor/coding.odin +++ b/core/encoding/cbor/coding.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:intrinsics" import "base:runtime" diff --git a/core/encoding/cbor/doc.odin b/core/encoding/cbor/doc.odin index 77eac51cb..937b1b61b 100644 --- a/core/encoding/cbor/doc.odin +++ b/core/encoding/cbor/doc.odin @@ -137,5 +137,5 @@ Output: "str": "Hello, World!" } */ -package cbor +package encoding_cbor diff --git a/core/encoding/cbor/marshal.odin b/core/encoding/cbor/marshal.odin index 87e91bbd8..37c9dd180 100644 --- a/core/encoding/cbor/marshal.odin +++ b/core/encoding/cbor/marshal.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:intrinsics" import "base:runtime" diff --git a/core/encoding/cbor/tags.odin b/core/encoding/cbor/tags.odin index 040ce2458..3dc79a5dd 100644 --- a/core/encoding/cbor/tags.odin +++ b/core/encoding/cbor/tags.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:runtime" diff --git a/core/encoding/cbor/unmarshal.odin b/core/encoding/cbor/unmarshal.odin index 5480b9125..a1524d9f4 100644 --- a/core/encoding/cbor/unmarshal.odin +++ b/core/encoding/cbor/unmarshal.odin @@ -1,4 +1,4 @@ -package cbor +package encoding_cbor import "base:intrinsics" import "base:runtime" diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin index 44a9fdcc4..22eea9568 100644 --- a/core/encoding/csv/reader.odin +++ b/core/encoding/csv/reader.odin @@ -1,6 +1,6 @@ // package csv reads and writes comma-separated values (CSV) files. // This package supports the format described in RFC 4180 -package csv +package encoding_csv import "core:bufio" import "core:bytes" diff --git a/core/encoding/csv/writer.odin b/core/encoding/csv/writer.odin index d519104f2..46145ecc1 100644 --- a/core/encoding/csv/writer.odin +++ b/core/encoding/csv/writer.odin @@ -1,4 +1,4 @@ -package csv +package encoding_csv import "core:io" import "core:strings" diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin index ec640c69f..cee6230ef 100644 --- a/core/encoding/entity/entity.odin +++ b/core/encoding/entity/entity.odin @@ -1,4 +1,4 @@ -package unicode_entity +package encoding_unicode_entity /* A unicode entity encoder/decoder diff --git a/core/encoding/entity/generated.odin b/core/encoding/entity/generated.odin index 3d1c02513..d2acde20d 100644 --- a/core/encoding/entity/generated.odin +++ b/core/encoding/entity/generated.odin @@ -1,4 +1,4 @@ -package unicode_entity +package encoding_unicode_entity /* ------ GENERATED ------ DO NOT EDIT ------ GENERATED ------ DO NOT EDIT ------ GENERATED ------ diff --git a/core/encoding/hex/hex.odin b/core/encoding/hex/hex.odin index ef0bab1d0..dbffe216b 100644 --- a/core/encoding/hex/hex.odin +++ b/core/encoding/hex/hex.odin @@ -1,4 +1,4 @@ -package hex +package encoding_hex import "core:strings" diff --git a/core/encoding/json/marshal.odin b/core/encoding/json/marshal.odin index 3d57316b3..04ef6d434 100644 --- a/core/encoding/json/marshal.odin +++ b/core/encoding/json/marshal.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:mem" import "core:math/bits" diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin index 8bcef1339..3973725dc 100644 --- a/core/encoding/json/parser.odin +++ b/core/encoding/json/parser.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:mem" import "core:unicode/utf8" diff --git a/core/encoding/json/tokenizer.odin b/core/encoding/json/tokenizer.odin index a406a73a5..5c20a2cc3 100644 --- a/core/encoding/json/tokenizer.odin +++ b/core/encoding/json/tokenizer.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:unicode/utf8" diff --git a/core/encoding/json/types.odin b/core/encoding/json/types.odin index 20c806236..73e183615 100644 --- a/core/encoding/json/types.odin +++ b/core/encoding/json/types.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:strings" diff --git a/core/encoding/json/unmarshal.odin b/core/encoding/json/unmarshal.odin index b2052e43c..691303521 100644 --- a/core/encoding/json/unmarshal.odin +++ b/core/encoding/json/unmarshal.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:mem" import "core:math" diff --git a/core/encoding/json/validator.odin b/core/encoding/json/validator.odin index 961c2dc23..a6873319d 100644 --- a/core/encoding/json/validator.odin +++ b/core/encoding/json/validator.odin @@ -1,4 +1,4 @@ -package json +package encoding_json import "core:mem" diff --git a/core/encoding/varint/doc.odin b/core/encoding/varint/doc.odin index 5e4708a59..c0a09873c 100644 --- a/core/encoding/varint/doc.odin +++ b/core/encoding/varint/doc.odin @@ -25,4 +25,4 @@ ``` */ -package varint \ No newline at end of file +package encoding_varint \ No newline at end of file diff --git a/core/encoding/varint/leb128.odin b/core/encoding/varint/leb128.odin index 1cdbb81b0..ca6513f04 100644 --- a/core/encoding/varint/leb128.odin +++ b/core/encoding/varint/leb128.odin @@ -8,7 +8,7 @@ // package varint implements variable length integer encoding and decoding using // the LEB128 format as used by DWARF debug info, Android .dex and other file formats. -package varint +package encoding_varint // In theory we should use the bigint package. In practice, varints bigger than this indicate a corrupted file. // Instead we'll set limits on the values we'll encode/decode diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin index 2607bec23..be958baaa 100644 --- a/core/encoding/xml/debug_print.odin +++ b/core/encoding/xml/debug_print.odin @@ -1,4 +1,4 @@ -package xml +package encoding_xml /* An XML 1.0 / 1.1 parser diff --git a/core/encoding/xml/helpers.odin b/core/encoding/xml/helpers.odin index 42a5258b3..a9d4ad493 100644 --- a/core/encoding/xml/helpers.odin +++ b/core/encoding/xml/helpers.odin @@ -1,4 +1,4 @@ -package xml +package encoding_xml /* An XML 1.0 / 1.1 parser diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin index a223a75d6..0f87c366b 100644 --- a/core/encoding/xml/tokenizer.odin +++ b/core/encoding/xml/tokenizer.odin @@ -1,4 +1,4 @@ -package xml +package encoding_xml /* An XML 1.0 / 1.1 parser diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index bf8646bc3..5b4b12948 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -24,7 +24,7 @@ MAYBE: List of contributors: - Jeroen van Rijn: Initial implementation. */ -package xml +package encoding_xml // An XML 1.0 / 1.1 parser import "core:bytes" From 689982a38dae7d6991eeca47585cca06d562d6c4 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 12:22:41 +0100 Subject: [PATCH 50/66] Force runtime type table to be in rodata/rdata section --- src/llvm_backend_type.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 0bac2f732..588768b1a 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -1103,6 +1103,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ LLVMValueRef giant_const = LLVMConstArray(lb_type(m, t_type_info_ptr), giant_const_values, cast(unsigned)global_type_info_data_entity_count); LLVMValueRef giant_array = lb_global_type_info_data_ptr(m).value; LLVMSetInitializer(giant_array, giant_const); + LLVMSetGlobalConstant(giant_array, true); } @@ -1132,4 +1133,7 @@ gb_internal void lb_setup_type_info_data(lbModule *m) { // NOTE(bill): Setup typ LLVMValueRef slice = llvm_const_slice_internal(m, data, len); LLVMSetInitializer(global_type_table.value, slice); + + // force it to be constant + LLVMSetGlobalConstant(global_type_table.value, true); } From b72d49ceb5e3b0010d933a7cd370b8d7e1502561 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 12:41:05 +0100 Subject: [PATCH 51/66] Set linkage to private for `__$type_info_data` --- src/llvm_backend_type.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 588768b1a..78e6af852 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -1104,6 +1104,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ LLVMValueRef giant_array = lb_global_type_info_data_ptr(m).value; LLVMSetInitializer(giant_array, giant_const); LLVMSetGlobalConstant(giant_array, true); + LLVMSetLinkage(giant_array, LLVMLinkerPrivateLinkage); } From 5200e3fe7a0eff6ecc76838e20ad33762ba08d5d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 12:45:20 +0100 Subject: [PATCH 52/66] Set `__$ti-` stuff to be private linkage --- src/llvm_backend_general.cpp | 2 +- src/llvm_backend_type.cpp | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/llvm_backend_general.cpp b/src/llvm_backend_general.cpp index 7a5ed5635..da69f94d7 100644 --- a/src/llvm_backend_general.cpp +++ b/src/llvm_backend_general.cpp @@ -211,7 +211,7 @@ gb_internal void lb_loop_end(lbProcedure *p, lbLoopData const &data) { gb_internal void lb_make_global_private_const(LLVMValueRef global_data) { - LLVMSetLinkage(global_data, LLVMPrivateLinkage); + LLVMSetLinkage(global_data, LLVMLinkerPrivateLinkage); LLVMSetUnnamedAddress(global_data, LLVMGlobalUnnamedAddr); LLVMSetGlobalConstant(global_data, true); } diff --git a/src/llvm_backend_type.cpp b/src/llvm_backend_type.cpp index 78e6af852..e202a59ba 100644 --- a/src/llvm_backend_type.cpp +++ b/src/llvm_backend_type.cpp @@ -249,9 +249,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ char name[64] = {}; gb_snprintf(name, 63, "__$ti-%lld", cast(long long)index); LLVMValueRef g = LLVMAddGlobal(m->mod, type, name); - LLVMSetLinkage(g, LLVMInternalLinkage); - LLVMSetUnnamedAddress(g, LLVMGlobalUnnamedAddr); - LLVMSetGlobalConstant(g, true); + lb_make_global_private_const(g); return g; }; @@ -1103,8 +1101,7 @@ gb_internal void lb_setup_type_info_data_giant_array(lbModule *m, i64 global_typ LLVMValueRef giant_const = LLVMConstArray(lb_type(m, t_type_info_ptr), giant_const_values, cast(unsigned)global_type_info_data_entity_count); LLVMValueRef giant_array = lb_global_type_info_data_ptr(m).value; LLVMSetInitializer(giant_array, giant_const); - LLVMSetGlobalConstant(giant_array, true); - LLVMSetLinkage(giant_array, LLVMLinkerPrivateLinkage); + lb_make_global_private_const(giant_array); } From 2416380f34f26bb2ccf45f5ca075293a3e07af19 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Thu, 18 Apr 2024 12:56:18 +0100 Subject: [PATCH 53/66] Enforce as global constant --- src/llvm_backend.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 645a091b0..4b94cf020 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -2659,7 +2659,7 @@ gb_internal bool lb_generate_code(lbGenerator *gen) { LLVMSetInitializer(g, LLVMConstNull(internal_llvm_type)); LLVMSetLinkage(g, USE_SEPARATE_MODULES ? LLVMExternalLinkage : LLVMInternalLinkage); LLVMSetUnnamedAddress(g, LLVMGlobalUnnamedAddr); - LLVMSetGlobalConstant(g, /*true*/false); + LLVMSetGlobalConstant(g, true); lbValue value = {}; value.value = g; From 3812d5e002fd2a2f4762b7732c72e49c1c6ee767 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 19 Apr 2024 00:19:02 +0100 Subject: [PATCH 54/66] Only override the comma value on `*_init` if it is "invalid" --- core/encoding/csv/reader.odin | 5 ++++- core/encoding/csv/writer.odin | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin index 22eea9568..5d3626b9f 100644 --- a/core/encoding/csv/reader.odin +++ b/core/encoding/csv/reader.odin @@ -91,7 +91,10 @@ DEFAULT_RECORD_BUFFER_CAPACITY :: 256 // reader_init initializes a new Reader from r reader_init :: proc(reader: ^Reader, r: io.Reader, buffer_allocator := context.allocator) { - reader.comma = ',' + switch reader.comma { + case '\x00', '\n', '\r', 0xfffd: + reader.comma = ',' + } context.allocator = buffer_allocator reserve(&reader.record_buffer, DEFAULT_RECORD_BUFFER_CAPACITY) diff --git a/core/encoding/csv/writer.odin b/core/encoding/csv/writer.odin index 46145ecc1..132fa0a51 100644 --- a/core/encoding/csv/writer.odin +++ b/core/encoding/csv/writer.odin @@ -17,7 +17,10 @@ Writer :: struct { // writer_init initializes a Writer that writes to w writer_init :: proc(writer: ^Writer, w: io.Writer) { - writer.comma = ',' + switch writer.comma { + case '\x00', '\n', '\r', 0xfffd: + writer.comma = ',' + } writer.w = w } From 20223345a4376c6490736ca952427b919c178985 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 19 Apr 2024 00:33:31 +0100 Subject: [PATCH 55/66] Return partial reads --- core/encoding/csv/reader.odin | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin index 5d3626b9f..f8c72c423 100644 --- a/core/encoding/csv/reader.odin +++ b/core/encoding/csv/reader.odin @@ -124,6 +124,7 @@ reader_destroy :: proc(r: ^Reader) { // read reads a single record (a slice of fields) from r // // All \r\n sequences are normalized to \n, including multi-line field +@(require_results) read :: proc(r: ^Reader, allocator := context.allocator) -> (record: []string, err: Error) { if r.reuse_record { record, err = _read_record(r, &r.last_record, allocator) @@ -136,6 +137,7 @@ read :: proc(r: ^Reader, allocator := context.allocator) -> (record: []string, e } // is_io_error checks where an Error is a specific io.Error kind +@(require_results) is_io_error :: proc(err: Error, io_err: io.Error) -> bool { if v, ok := err.(io.Error); ok { return v == io_err @@ -143,10 +145,10 @@ is_io_error :: proc(err: Error, io_err: io.Error) -> bool { return false } - // read_all reads all the remaining records from r. // Each record is a slice of fields. // read_all is defined to read until an EOF, and does not treat, and does not treat EOF as an error +@(require_results) read_all :: proc(r: ^Reader, allocator := context.allocator) -> ([][]string, Error) { context.allocator = allocator records: [dynamic][]string @@ -156,13 +158,18 @@ read_all :: proc(r: ^Reader, allocator := context.allocator) -> ([][]string, Err return records[:], nil } if rerr != nil { - return nil, rerr + // allow for a partial read + if record != nil { + append(&records, record) + } + return records[:], rerr } append(&records, record) } } // read reads a single record (a slice of fields) from the provided input. +@(require_results) read_from_string :: proc(input: string, record_allocator := context.allocator, buffer_allocator := context.allocator) -> (record: []string, n: int, err: Error) { ir: strings.Reader strings.reader_init(&ir, input) @@ -178,6 +185,7 @@ read_from_string :: proc(input: string, record_allocator := context.allocator, b // read_all reads all the remaining records from the provided input. +@(require_results) read_all_from_string :: proc(input: string, records_allocator := context.allocator, buffer_allocator := context.allocator) -> ([][]string, Error) { ir: strings.Reader strings.reader_init(&ir, input) @@ -189,7 +197,7 @@ read_all_from_string :: proc(input: string, records_allocator := context.allocat return read_all(&r, records_allocator) } -@private +@(private, require_results) is_valid_delim :: proc(r: rune) -> bool { switch r { case 0, '"', '\r', '\n', utf8.RUNE_ERROR: @@ -198,8 +206,9 @@ is_valid_delim :: proc(r: rune) -> bool { return utf8.valid_rune(r) } -@private +@(private, require_results) _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.allocator) -> ([]string, Error) { + @(require_results) read_line :: proc(r: ^Reader) -> ([]byte, io.Error) { if !r.multiline_fields { line, err := bufio.reader_read_slice(&r.r, '\n') @@ -269,6 +278,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all unreachable() } + @(require_results) length_newline :: proc(b: []byte) -> int { if len(b) > 0 && b[len(b)-1] == '\n' { return 1 @@ -276,6 +286,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all return 0 } + @(require_results) next_rune :: proc(b: []byte) -> rune { r, _ := utf8.decode_rune(b) return r From ec7e75a57fc6376242089c7747203ac630c9bc1a Mon Sep 17 00:00:00 2001 From: "Maurizio M. Gavioli" Date: Fri, 19 Apr 2024 08:23:28 +0200 Subject: [PATCH 56/66] Fix #3451 - `core:text/i18n` default `number` value in `get_*_section` proc. In the to procs `get_single_section()` and `get_by_section()` the `number` parameter should have a default of `1` rather than `0`. See the issue for more details. --- core/text/i18n/i18n.odin | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/text/i18n/i18n.odin b/core/text/i18n/i18n.odin index 151f9e129..64593c4e8 100644 --- a/core/text/i18n/i18n.odin +++ b/core/text/i18n/i18n.odin @@ -90,7 +90,7 @@ DEFAULT_PARSE_OPTIONS :: Parse_Options{ - get(key, number), which returns the appropriate plural from the active catalog, or - get(key, number, catalog) to grab text from a specific one. */ -get_single_section :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) { +get_single_section :: proc(key: string, number := 1, catalog: ^Translation = ACTIVE) -> (value: string) { /* A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule. */ @@ -108,7 +108,7 @@ get_single_section :: proc(key: string, number := 0, catalog: ^Translation = ACT - get(section, key, number), which returns the appropriate plural from the active catalog, or - get(section, key, number, catalog) to grab text from a specific one. */ -get_by_section :: proc(section, key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) { +get_by_section :: proc(section, key: string, number := 1, catalog: ^Translation = ACTIVE) -> (value: string) { /* A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule. */ From c44f618b7dec82cf80609fd613c93ef91cf6a7ae Mon Sep 17 00:00:00 2001 From: Rickard Andersson Date: Fri, 19 Apr 2024 15:17:21 +0300 Subject: [PATCH 57/66] fix(net): add `NOSIGNAL` to `send` options This is a better default than not having it, since it turns errors that would be signals into error values instead. We could take these as options but given that we currently don't I think this at the very least improves on the status quo. --- core/net/errors_linux.odin | 1 + core/net/socket_linux.odin | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/core/net/errors_linux.odin b/core/net/errors_linux.odin index 2370dd0d8..5e2c52aea 100644 --- a/core/net/errors_linux.odin +++ b/core/net/errors_linux.odin @@ -136,6 +136,7 @@ TCP_Send_Error :: enum c.int { Interrupted = c.int(linux.Errno.EINTR), // A signal occurred before any data was transmitted. See signal(7). Timeout = c.int(linux.Errno.EWOULDBLOCK), // The send timeout duration passed before all data was sent. See Socket_Option.Send_Timeout. Not_Socket = c.int(linux.Errno.ENOTSOCK), // The so-called socket is not an open socket. + Broken_Pipe = c.int(linux.Errno.EPIPE), // The peer has disconnected when we are trying to send to it } // TODO diff --git a/core/net/socket_linux.odin b/core/net/socket_linux.odin index ba48959fb..9c4342592 100644 --- a/core/net/socket_linux.odin +++ b/core/net/socket_linux.odin @@ -258,7 +258,7 @@ _send_tcp :: proc(tcp_sock: TCP_Socket, buf: []byte) -> (int, Network_Error) { for total_written < len(buf) { limit := min(int(max(i32)), len(buf) - total_written) remaining := buf[total_written:][:limit] - res, errno := linux.send(linux.Fd(tcp_sock), remaining, {}) + res, errno := linux.send(linux.Fd(tcp_sock), remaining, {.NOSIGNAL}) if errno != .NONE { return total_written, TCP_Send_Error(errno) } From 7b95562827290258c49e27c7ee8d7be53b7239fe Mon Sep 17 00:00:00 2001 From: Rickard Andersson Date: Fri, 19 Apr 2024 15:29:28 +0300 Subject: [PATCH 58/66] feat(net): turn `EPIPE` into `Connection_Closed` --- core/net/errors_linux.odin | 1 - core/net/socket_linux.odin | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/core/net/errors_linux.odin b/core/net/errors_linux.odin index 5e2c52aea..2370dd0d8 100644 --- a/core/net/errors_linux.odin +++ b/core/net/errors_linux.odin @@ -136,7 +136,6 @@ TCP_Send_Error :: enum c.int { Interrupted = c.int(linux.Errno.EINTR), // A signal occurred before any data was transmitted. See signal(7). Timeout = c.int(linux.Errno.EWOULDBLOCK), // The send timeout duration passed before all data was sent. See Socket_Option.Send_Timeout. Not_Socket = c.int(linux.Errno.ENOTSOCK), // The so-called socket is not an open socket. - Broken_Pipe = c.int(linux.Errno.EPIPE), // The peer has disconnected when we are trying to send to it } // TODO diff --git a/core/net/socket_linux.odin b/core/net/socket_linux.odin index 9c4342592..d9b29fb3a 100644 --- a/core/net/socket_linux.odin +++ b/core/net/socket_linux.odin @@ -259,7 +259,9 @@ _send_tcp :: proc(tcp_sock: TCP_Socket, buf: []byte) -> (int, Network_Error) { limit := min(int(max(i32)), len(buf) - total_written) remaining := buf[total_written:][:limit] res, errno := linux.send(linux.Fd(tcp_sock), remaining, {.NOSIGNAL}) - if errno != .NONE { + if errno == .EPIPE { + return total_written, TCP_Send_Error.Connection_Closed + } else if errno != .NONE { return total_written, TCP_Send_Error(errno) } total_written += int(res) From 059175de3bfab925085808989aadd909932b5c1d Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 19 Apr 2024 13:32:55 +0100 Subject: [PATCH 59/66] Do not print column of a runtime.Source_Code_Location if the `column == 0` --- base/runtime/print.odin | 12 ++++++++---- core/fmt/fmt.odin | 12 ++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/base/runtime/print.odin b/base/runtime/print.odin index 4e2ffaf80..ed5893e15 100644 --- a/base/runtime/print.odin +++ b/base/runtime/print.odin @@ -228,14 +228,18 @@ print_caller_location :: #force_no_inline proc "contextless" (loc: Source_Code_L when ODIN_ERROR_POS_STYLE == .Default { print_byte('(') print_u64(u64(loc.line)) - print_byte(':') - print_u64(u64(loc.column)) + if loc.column != 0 { + print_byte(':') + print_u64(u64(loc.column)) + } print_byte(')') } else when ODIN_ERROR_POS_STYLE == .Unix { print_byte(':') print_u64(u64(loc.line)) - print_byte(':') - print_u64(u64(loc.column)) + if loc.column != 0 { + print_byte(':') + print_u64(u64(loc.column)) + } print_byte(':') } else { #panic("unhandled ODIN_ERROR_POS_STYLE") diff --git a/core/fmt/fmt.odin b/core/fmt/fmt.odin index d3b9d7d69..547d59ce0 100644 --- a/core/fmt/fmt.odin +++ b/core/fmt/fmt.odin @@ -2156,14 +2156,18 @@ fmt_named :: proc(fi: ^Info, v: any, verb: rune, info: runtime.Type_Info_Named) when ODIN_ERROR_POS_STYLE == .Default { io.write_byte(fi.writer, '(', &fi.n) io.write_int(fi.writer, int(a.line), 10, &fi.n) - io.write_byte(fi.writer, ':', &fi.n) - io.write_int(fi.writer, int(a.column), 10, &fi.n) + if a.column != 0 { + io.write_byte(fi.writer, ':', &fi.n) + io.write_int(fi.writer, int(a.column), 10, &fi.n) + } io.write_byte(fi.writer, ')', &fi.n) } else when ODIN_ERROR_POS_STYLE == .Unix { io.write_byte(fi.writer, ':', &fi.n) io.write_int(fi.writer, int(a.line), 10, &fi.n) - io.write_byte(fi.writer, ':', &fi.n) - io.write_int(fi.writer, int(a.column), 10, &fi.n) + if a.column != 0 { + io.write_byte(fi.writer, ':', &fi.n) + io.write_int(fi.writer, int(a.column), 10, &fi.n) + } io.write_byte(fi.writer, ':', &fi.n) } else { #panic("Unhandled ODIN_ERROR_POS_STYLE") From efc84cd390e6773ee71e35bc851ce4f55f39c34a Mon Sep 17 00:00:00 2001 From: Rickard Andersson Date: Fri, 19 Apr 2024 15:37:20 +0300 Subject: [PATCH 60/66] docs(net): add comment about `EPIPE` -> `Connection_Closed` --- core/net/socket_linux.odin | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/net/socket_linux.odin b/core/net/socket_linux.odin index d9b29fb3a..ee3a41927 100644 --- a/core/net/socket_linux.odin +++ b/core/net/socket_linux.odin @@ -260,7 +260,9 @@ _send_tcp :: proc(tcp_sock: TCP_Socket, buf: []byte) -> (int, Network_Error) { remaining := buf[total_written:][:limit] res, errno := linux.send(linux.Fd(tcp_sock), remaining, {.NOSIGNAL}) if errno == .EPIPE { - return total_written, TCP_Send_Error.Connection_Closed + // If the peer is disconnected when we are trying to send we will get an `EPIPE` error, + // so we turn that into a clearer error + return total_written, .Connection_Closed } else if errno != .NONE { return total_written, TCP_Send_Error(errno) } From 68f663ea8585f0de6ca7d34ecf93031603f22cb6 Mon Sep 17 00:00:00 2001 From: Rickard Andersson Date: Fri, 19 Apr 2024 15:39:04 +0300 Subject: [PATCH 61/66] fix(net): fix return type for `send_tcp` Was `.Connection_Closed` but this is only inferrable if our return type is not a sub-union of another. --- core/net/socket_linux.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/net/socket_linux.odin b/core/net/socket_linux.odin index ee3a41927..a4d75b92b 100644 --- a/core/net/socket_linux.odin +++ b/core/net/socket_linux.odin @@ -262,7 +262,7 @@ _send_tcp :: proc(tcp_sock: TCP_Socket, buf: []byte) -> (int, Network_Error) { if errno == .EPIPE { // If the peer is disconnected when we are trying to send we will get an `EPIPE` error, // so we turn that into a clearer error - return total_written, .Connection_Closed + return total_written, TCP_Send_Error.Connection_Closed } else if errno != .NONE { return total_written, TCP_Send_Error(errno) } From 0a16f7a6f1e3e40dfed7cb93725d325787bc948b Mon Sep 17 00:00:00 2001 From: Thomas la Cour Date: Tue, 26 Mar 2024 12:22:18 +0100 Subject: [PATCH 62/66] normalize_path --- src/build_settings.cpp | 6 ++---- src/string.cpp | 34 ++++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/build_settings.cpp b/src/build_settings.cpp index b806adcd6..03a95a19b 100644 --- a/src/build_settings.cpp +++ b/src/build_settings.cpp @@ -840,13 +840,11 @@ gb_internal String odin_root_dir(void) { char const *found = gb_get_env("ODIN_ROOT", a); if (found) { String path = path_to_full_path(a, make_string_c(found)); - if (path[path.len-1] != '/' && path[path.len-1] != '\\') { #if defined(GB_SYSTEM_WINDOWS) - path = concatenate_strings(a, path, WIN32_SEPARATOR_STRING); + path = normalize_path(a, path, WIN32_SEPARATOR_STRING); #else - path = concatenate_strings(a, path, NIX_SEPARATOR_STRING); + path = normalize_path(a, path, NIX_SEPARATOR_STRING); #endif - } global_module_path = path; global_module_path_set = true; diff --git a/src/string.cpp b/src/string.cpp index 3747f4564..b92dd589e 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -237,11 +237,16 @@ gb_internal String string_split_iterator(String_Iterator *it, const char sep) { return substring(it->str, start, end); } +gb_internal gb_inline bool is_separator(u8 const &ch) { + return (ch == '/' || ch == '\\'); +} + + gb_internal gb_inline isize string_extension_position(String const &str) { isize dot_pos = -1; isize i = str.len; while (i --> 0) { - if (str[i] == '\\' || str[i] == '/') + if (is_separator(str[i])) break; if (str[i] == '.') { dot_pos = i; @@ -332,8 +337,7 @@ gb_internal String filename_from_path(String s) { if (i > 0) { isize j = 0; for (j = s.len-1; j >= 0; j--) { - if (s[j] == '/' || - s[j] == '\\') { + if (is_separator(s[j])) { break; } } @@ -346,8 +350,7 @@ gb_internal String filename_from_path(String s) { gb_internal String filename_without_directory(String s) { isize j = 0; for (j = s.len-1; j >= 0; j--) { - if (s[j] == '/' || - s[j] == '\\') { + if (is_separator(s[j])) { break; } } @@ -410,7 +413,26 @@ gb_internal String copy_string(gbAllocator a, String const &s) { return make_string(data, s.len); } - +gb_internal String normalize_path(gbAllocator a, String const &path, String const &sep) { + String s; + if (sep.len < 1) { + return path; + } + if (path.len < 1) { + s = STR_LIT(""); + } else if (is_separator(path[path.len-1])) { + s = copy_string(a, path); + } else { + s = concatenate_strings(a, path, sep); + } + isize i; + for (i = 0; i < s.len; i++) { + if (is_separator(s.text[i])) { + s.text[i] = sep.text[0]; + } + } + return s; +} #if defined(GB_SYSTEM_WINDOWS) From ebb1a07dd081bb9210e093ebac89f692cb8200d6 Mon Sep 17 00:00:00 2001 From: Thomas la Cour Date: Tue, 26 Mar 2024 12:22:41 +0100 Subject: [PATCH 63/66] spelling --- src/main.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 53103ce3a..4e8f64e05 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -337,12 +337,12 @@ struct BuildFlag { String name; BuildFlagParamKind param_kind; u32 command_support; - bool allow_mulitple; + bool allow_multiple; }; -gb_internal void add_flag(Array *build_flags, BuildFlagKind kind, String name, BuildFlagParamKind param_kind, u32 command_support, bool allow_mulitple=false) { - BuildFlag flag = {kind, name, param_kind, command_support, allow_mulitple}; +gb_internal void add_flag(Array *build_flags, BuildFlagKind kind, String name, BuildFlagParamKind param_kind, u32 command_support, bool allow_multiple=false) { + BuildFlag flag = {kind, name, param_kind, command_support, allow_multiple}; array_add(build_flags, flag); } @@ -1358,7 +1358,7 @@ gb_internal bool parse_build_flags(Array args) { } } - if (!bf.allow_mulitple) { + if (!bf.allow_multiple) { set_flags[bf.kind] = ok; } } From 2a70faca146b752a5009b7a7bb68c488461e40bb Mon Sep 17 00:00:00 2001 From: Damian Tarnawski Date: Sun, 21 Apr 2024 22:37:04 +0200 Subject: [PATCH 64/66] Add printfln and eprintfln functions to fmt_js.odin --- core/fmt/fmt_js.odin | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/fmt/fmt_js.odin b/core/fmt/fmt_js.odin index c70b7c1c0..a0a890a9a 100644 --- a/core/fmt/fmt_js.odin +++ b/core/fmt/fmt_js.odin @@ -37,6 +37,8 @@ print :: proc(args: ..any, sep := " ", flush := true) -> int { return wprint(w println :: proc(args: ..any, sep := " ", flush := true) -> int { return wprintln(w=stdout, args=args, sep=sep, flush=flush) } // printf formats according to the specififed format string and writes to stdout printf :: proc(fmt: string, args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush) } +// printfln formats according to the specified format string and writes to stdout, followed by a newline. +printfln :: proc(fmt: string, args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush, newline=true) } // eprint formats using the default print settings and writes to stderr eprint :: proc(args: ..any, sep := " ", flush := true) -> int { return wprint(w=stderr, args=args, sep=sep, flush=flush) } @@ -44,3 +46,5 @@ eprint :: proc(args: ..any, sep := " ", flush := true) -> int { return wprint( eprintln :: proc(args: ..any, sep := " ", flush := true) -> int { return wprintln(w=stderr, args=args, sep=sep, flush=flush) } // eprintf formats according to the specififed format string and writes to stderr eprintf :: proc(fmt: string, args: ..any, flush := true) -> int { return wprintf(stderr, fmt, ..args, flush=flush) } +// eprintfln formats according to the specified format string and writes to stderr, followed by a newline. +eprintfln :: proc(fmt: string, args: ..any, flush := true) -> int { return wprintf(stdout, fmt, ..args, flush=flush, newline=true) } From 90369b669b5d48f1912f5a3667fcea57a0c4cef2 Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Mon, 22 Apr 2024 19:05:50 +0200 Subject: [PATCH 65/66] fix direct proc args debug info --- src/llvm_backend_debug.cpp | 12 ++---------- src/llvm_backend_proc.cpp | 13 ++----------- 2 files changed, 4 insertions(+), 21 deletions(-) diff --git a/src/llvm_backend_debug.cpp b/src/llvm_backend_debug.cpp index 853941496..2654a1d28 100644 --- a/src/llvm_backend_debug.cpp +++ b/src/llvm_backend_debug.cpp @@ -1027,7 +1027,7 @@ gb_internal void lb_add_debug_local_variable(lbProcedure *p, LLVMValueRef ptr, T LLVMDIBuilderInsertDeclareAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block); } -gb_internal void lb_add_debug_param_variable(lbProcedure *p, LLVMValueRef ptr, Type *type, Token const &token, unsigned arg_number, lbBlock *block, lbArgKind arg_kind) { +gb_internal void lb_add_debug_param_variable(lbProcedure *p, LLVMValueRef ptr, Type *type, Token const &token, unsigned arg_number, lbBlock *block) { if (p->debug_info == nullptr) { return; } @@ -1088,15 +1088,7 @@ gb_internal void lb_add_debug_param_variable(lbProcedure *p, LLVMValueRef ptr, T // NOTE(bill, 2022-02-01): For parameter values, you must insert them at the end of the decl block // The reason is that if the parameter is at index 0 and a pointer, there is not such things as an // instruction "before" it. - switch (arg_kind) { - case lbArg_Direct: - LLVMDIBuilderInsertDbgValueAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block->block); - break; - case lbArg_Indirect: - LLVMDIBuilderInsertDeclareAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block->block); - break; - } - + LLVMDIBuilderInsertDeclareAtEnd(m->debug_builder, storage, var_info, llvm_expr, llvm_debug_loc, block->block); } diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index bb4aed3f1..f73698d34 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -597,16 +597,7 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { lbValue ptr = lb_address_from_load_or_generate_local(p, param); GB_ASSERT(LLVMIsAAllocaInst(ptr.value)); lb_add_entity(p->module, e, ptr); - - lbBlock *block = p->decl_block; - if (original_value != value) { - block = p->curr_block; - } - LLVMValueRef debug_storage_value = value; - if (original_value != value && LLVMIsALoadInst(value)) { - debug_storage_value = LLVMGetOperand(value, 0); - } - lb_add_debug_param_variable(p, debug_storage_value, e->type, e->token, param_index+1, block, arg_type->kind); + lb_add_debug_param_variable(p, ptr.value, e->type, e->token, param_index+1, p->curr_block); } } else if (arg_type->kind == lbArg_Indirect) { if (e->token.string.len != 0 && !is_blank_ident(e->token.string)) { @@ -614,7 +605,7 @@ gb_internal void lb_begin_procedure_body(lbProcedure *p) { ptr.value = LLVMGetParam(p->value, param_offset+param_index); ptr.type = alloc_type_pointer(e->type); lb_add_entity(p->module, e, ptr); - lb_add_debug_param_variable(p, ptr.value, e->type, e->token, param_index+1, p->decl_block, arg_type->kind); + lb_add_debug_param_variable(p, ptr.value, e->type, e->token, param_index+1, p->decl_block); } } } From c6a446fe87b818ef7d34131babe074080cdc4575 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Mon, 22 Apr 2024 18:41:48 +0100 Subject: [PATCH 66/66] Add check for `build.` and `run.` typos --- src/main.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 53103ce3a..8a1f4852e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -198,7 +198,12 @@ gb_internal void print_usage_line(i32 indent, char const *fmt, ...) { gb_printf("\n"); } -gb_internal void usage(String argv0) { +gb_internal void usage(String argv0, String argv1 = {}) { + if (argv1 == "run.") { + print_usage_line(0, "Did you mean 'odin run .'?"); + } else if (argv1 == "build.") { + print_usage_line(0, "Did you mean 'odin build .'?"); + } print_usage_line(0, "%.*s is a tool for managing Odin source code.", LIT(argv0)); print_usage_line(0, "Usage:"); print_usage_line(1, "%.*s command [arguments]", LIT(argv0)); @@ -2586,7 +2591,11 @@ int main(int arg_count, char const **arg_ptr) { gb_printf("%.*s", LIT(odin_root_dir())); return 0; } else { - usage(args[0]); + String argv1 = {}; + if (args.count > 1) { + argv1 = args[1]; + } + usage(args[0], argv1); return 1; }