diff --git a/core/encoding/json/parser.odin b/core/encoding/json/parser.odin index 7bf88c565..ed36ae33b 100644 --- a/core/encoding/json/parser.odin +++ b/core/encoding/json/parser.odin @@ -40,7 +40,7 @@ parse_string :: proc(data: string, spec := DEFAULT_SPECIFICATION, parse_integers return parse_object(&p) case .JSON5: return parse_value(&p) - case .MJSON: + case .SJSON: #partial switch p.curr_token.kind { case .Ident, .String: return parse_object_body(&p, .EOF) diff --git a/core/encoding/json/types.odin b/core/encoding/json/types.odin index 534d20311..468774aa9 100644 --- a/core/encoding/json/types.odin +++ b/core/encoding/json/types.odin @@ -33,8 +33,9 @@ package json Specification :: enum { JSON, JSON5, // https://json5.org/ - MJSON, // https://bitsquid.blogspot.com/2009/10/simplified-json-notation.html - Bitsquid = MJSON, + SJSON, // https://bitsquid.blogspot.com/2009/10/simplified-json-notation.html + Bitsquid = SJSON, + MJSON = SJSON, } diff --git a/core/strings/strings.odin b/core/strings/strings.odin index 678cc94cd..6bdafbba4 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -225,14 +225,23 @@ equal_fold :: proc(u, v: string) -> bool { */ prefix_length :: proc(a, b: string) -> (n: int) { _len := min(len(a), len(b)) - idx := 0 - #no_bounds_check for idx < _len && a[idx] == b[idx] { - idx += 1 + // Scan for matches including partial codepoints. + #no_bounds_check for n < _len && a[n] == b[n] { + n += 1 + } - if a[idx] & 128 != 128 { - // new codepoint or end of multi-byte codepoint, update match length - n = idx + // Now scan to ignore partial codepoints. + if n > 0 { + s := a[:n] + n = 0 + for { + r0, w := utf8.decode_rune(s[n:]) + if r0 != utf8.RUNE_ERROR { + n += w + } else { + break + } } } return diff --git a/core/unicode/utf8/utf8.odin b/core/unicode/utf8/utf8.odin index 6a04b0fe9..a0da5c5d1 100644 --- a/core/unicode/utf8/utf8.odin +++ b/core/unicode/utf8/utf8.odin @@ -90,10 +90,15 @@ encode_rune :: proc(c: rune) -> ([4]u8, int) { return buf, 4 } -decode_rune_in_string :: #force_inline proc(s: string) -> (rune, int) { - return decode_rune(transmute([]u8)s) + +decode_rune :: proc{ + decode_rune_in_string, + decode_rune_in_bytes, } -decode_rune :: proc(s: []u8) -> (rune, int) { +decode_rune_in_string :: #force_inline proc(s: string) -> (rune, int) { + return decode_rune_in_bytes(transmute([]u8)s) +} +decode_rune_in_bytes :: proc(s: []u8) -> (rune, int) { n := len(s) if n < 1 { return RUNE_ERROR, 0 @@ -161,10 +166,15 @@ runes_to_string :: proc(runes: []rune, allocator := context.allocator) -> string } -decode_last_rune_in_string :: #force_inline proc(s: string) -> (rune, int) { - return decode_last_rune(transmute([]u8)s) +decode_last_rune :: proc{ + decode_last_rune_in_string, + decode_last_rune_in_bytes, } -decode_last_rune :: proc(s: []u8) -> (rune, int) { + +decode_last_rune_in_string :: #force_inline proc(s: string) -> (rune, int) { + return decode_last_rune_in_bytes(transmute([]u8)s) +} +decode_last_rune_in_bytes :: proc(s: []u8) -> (rune, int) { r: rune size: int start, end, limit: int @@ -297,10 +307,15 @@ rune_start :: #force_inline proc(b: u8) -> bool { return b&0xc0 != 0x80 } -rune_count_in_string :: #force_inline proc(s: string) -> int { - return rune_count(transmute([]u8)s) +rune_count :: proc{ + rune_count_in_string, + rune_count_in_bytes, } -rune_count :: proc(s: []u8) -> int { + +rune_count_in_string :: #force_inline proc(s: string) -> int { + return rune_count_in_bytes(transmute([]u8)s) +} +rune_count_in_bytes :: proc(s: []u8) -> int { count := 0 n := len(s) @@ -353,7 +368,14 @@ rune_size :: proc(r: rune) -> int { // full_rune reports if the bytes in b begin with a full utf-8 encoding of a rune or not // An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR) -full_rune :: proc(b: []byte) -> bool { +full_rune :: proc{ + full_rune_in_bytes, + full_rune_in_string, +} + +// full_rune_in_bytes reports if the bytes in b begin with a full utf-8 encoding of a rune or not +// An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR) +full_rune_in_bytes :: proc(b: []byte) -> bool { n := len(b) if n == 0 { return false @@ -374,7 +396,7 @@ full_rune :: proc(b: []byte) -> bool { // full_rune_in_string reports if the bytes in s begin with a full utf-8 encoding of a rune or not // An invalid encoding is considered a full rune since it will convert as an error rune of width 1 (RUNE_ERROR) full_rune_in_string :: proc(s: string) -> bool { - return full_rune(transmute([]byte)s) + return full_rune_in_bytes(transmute([]byte)s) }