package json; JSON5 support

This commit is contained in:
gingerBill
2019-01-06 21:48:13 +00:00
parent d732a51587
commit d1b9f3ac74
4 changed files with 319 additions and 48 deletions
+118 -21
View File
@@ -7,20 +7,27 @@ import "core:strconv"
Parser :: struct {
tok: Tokenizer,
curr_token: Token,
spec: Specification,
allocator: mem.Allocator,
}
make_parser :: proc(data: string, allocator := context.allocator) -> Parser {
make_parser :: proc(data: string, spec := Specification.JSON, allocator := context.allocator) -> Parser {
p: Parser;
p.tok = make_tokenizer(data);
p.tok = make_tokenizer(data, spec);
p.spec = spec;
p.allocator = allocator;
assert(p.allocator.procedure != nil);
advance_token(&p);
return p;
}
parse :: proc(data: string, allocator := context.allocator) -> (Value, Error) {
p := make_parser(data, allocator);
parse :: proc(data: string, spec := Specification.JSON, allocator := context.allocator) -> (Value, Error) {
context.allocator = allocator;
p := make_parser(data, spec, allocator);
if p.spec == Specification.JSON5 {
return parse_value(&p);
}
return parse_object(&p);
}
@@ -77,7 +84,7 @@ parse_value :: proc(p: ^Parser) -> (value: Value, err: Error) {
advance_token(p);
return;
case Kind.String:
value.value = String(unquote_string(token, p.allocator));
value.value = String(unquote_string(token, p.spec, p.allocator));
advance_token(p);
return;
@@ -132,6 +139,34 @@ parse_array :: proc(p: ^Parser) -> (value: Value, err: Error) {
return;
}
clone_string :: proc(s: string, allocator: mem.Allocator) -> string {
n := len(s);
b := make([]byte, n+1, allocator);
copy(b, cast([]byte)s);
b[n] = 0;
return string(b[:n]);
}
parse_object_key :: proc(p: ^Parser) -> (key: string, err: Error) {
tok := p.curr_token;
if p.spec == Specification.JSON5 {
if tok.kind == Kind.String {
expect_token(p, Kind.String);
key = unquote_string(tok, p.spec, p.allocator);
return;
} else if tok.kind == Kind.Ident {
expect_token(p, Kind.Ident);
key = clone_string(tok.text, p.allocator);
return;
}
}
if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
err = Error.Expected_String_For_Object_Key;
return;
}
key = unquote_string(tok, p.spec, p.allocator);
return;
}
parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
value.pos = p.curr_token.pos;
@@ -144,20 +179,20 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
obj.allocator = p.allocator;
defer if err != Error.None {
for key, elem in obj {
delete(key);
delete(key, p.allocator);
destroy_value(elem);
}
delete(obj);
}
for p.curr_token.kind != Kind.Close_Brace {
tok := p.curr_token;
if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
err = Error.Expected_String_For_Object_Key;
key: string;
key, err = parse_object_key(p);
if err != Error.None {
delete(key, p.allocator);
value.pos = p.curr_token.pos;
return;
}
key := unquote_string(tok, p.allocator);
if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None {
err = Error.Expected_Colon_After_Key;
@@ -175,17 +210,24 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
if key in obj {
err = Error.Duplicate_Object_Key;
value.pos = p.curr_token.pos;
delete(key);
delete(key, p.allocator);
return;
}
obj[key] = elem;
// Disallow trailing commas for the time being
if allow_token(p, Kind.Comma) {
continue;
if p.spec == Specification.JSON5 {
// Allow trailing commas
if allow_token(p, Kind.Comma) {
continue;
}
} else {
break;
// Disallow trailing commas
if allow_token(p, Kind.Comma) {
continue;
} else {
break;
}
}
}
@@ -200,7 +242,25 @@ parse_object :: proc(p: ^Parser) -> (value: Value, err: Error) {
// IMPORTANT NOTE(bill): unquote_string assumes a mostly valid string
unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
unquote_string :: proc(token: Token, spec: Specification, allocator := context.allocator) -> string {
get_u2_rune :: proc(s: string) -> rune {
if len(s) < 4 || s[0] != '\\' || s[1] != 'x' {
return -1;
}
r: rune;
for c in s[2:4] {
x: rune;
switch c {
case '0'..'9': x = c - '0';
case 'a'..'f': x = c - 'a' + 10;
case 'A'..'F': x = c - 'A' + 10;
case: return -1;
}
r = r*16 + x;
}
return r;
}
get_u4_rune :: proc(s: string) -> rune {
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1;
@@ -227,12 +287,17 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
if len(s) <= 2 {
return "";
}
quote := s[0];
if s[0] != s[len(s)-1] {
// Invalid string
return "";
}
s = s[1:len(s)-1];
i := 0;
for i < len(s) {
c := s[i];
if c == '\\' || c == '"' || c < ' ' {
if c == '\\' || c == quote || c < ' ' {
break;
}
if c < utf8.RUNE_SELF {
@@ -246,9 +311,7 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
i += w;
}
if i == len(s) {
b := make([]byte, len(s), allocator);
copy(b, cast([]byte)s);
return string(b);
return clone_string(s, allocator);
}
b := make([]byte, len(s) + 2*utf8.UTF_MAX, allocator);
@@ -299,9 +362,43 @@ unquote_string :: proc(token: Token, allocator := context.allocator) -> string {
buf, buf_width := utf8.encode_rune(r);
copy(b[w:], buf[:buf_width]);
w += buf_width;
case '0':
if spec == Specification.JSON5 {
b[w] = '\x00';
i += 1;
w += 1;
} else {
break loop;
}
case 'v':
if spec == Specification.JSON5 {
b[w] = '\v';
i += 1;
w += 1;
} else {
break loop;
}
case 'x':
if spec == Specification.JSON5 {
i -= 1; // Include the \x in the check for sanity sake
r := get_u2_rune(s[i:]);
if r < 0 {
break loop;
}
i += 4;
buf, buf_width := utf8.encode_rune(r);
copy(b[w:], buf[:buf_width]);
w += buf_width;
} else {
break loop;
}
}
case c == '"', c < ' ':
case c == quote, c < ' ':
break loop;
case c < utf8.RUNE_SELF:
+164 -18
View File
@@ -15,6 +15,9 @@ Kind :: enum {
False,
True,
Infinity,
NaN,
Ident,
Integer,
@@ -37,13 +40,17 @@ Tokenizer :: struct {
r: rune, // current rune
w: int, // current rune width in bytes
curr_line_offset: int,
spec: Specification,
}
make_tokenizer :: proc(data: string) -> Tokenizer {
t := Tokenizer{pos = {line=1}, data = data};
make_tokenizer :: proc(data: string, spec := Specification.JSON) -> Tokenizer {
t := Tokenizer{pos = {line=1}, data = data, spec = spec};
next_rune(&t);
if t.r == utf8.RUNE_BOM {
next_rune(&t);
}
return t;
}
@@ -69,6 +76,17 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
}
}
skip_hex_digits :: proc(t: ^Tokenizer) {
for t.offset < len(t.data) {
next_rune(t);
switch t.r {
case '0'..'9', 'a'..'f', 'A'..'F':
// Okay
case:
return;
}
}
}
scan_espace :: proc(t: ^Tokenizer) -> bool {
switch t.r {
@@ -104,12 +122,39 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
t.pos.column = 1;
next_rune(t);
case:
if t.spec == Specification.JSON5 {
switch t.r {
case 0x2028, 0x2029, 0xFEFF:
next_rune(t);
continue loop;
}
}
break loop;
}
}
return t.r;
}
skip_to_next_line :: proc(t: ^Tokenizer) {
for t.offset < len(t.data) {
r := next_rune(t);
if r == '\n' {
return;
}
}
}
skip_alphanum :: proc(t: ^Tokenizer) {
for t.offset < len(t.data) {
switch next_rune(t) {
case 'A'..'Z', 'a'..'z', '0'..'9', '_':
continue;
}
return;
}
}
skip_whitespace(t);
token.pos = t.pos;
@@ -118,7 +163,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
curr_rune := t.r;
next_rune(t);
switch curr_rune {
block: switch curr_rune {
case utf8.RUNE_ERROR:
err = Error.Illegal_Character;
case utf8.RUNE_EOF, '\x00':
@@ -127,21 +172,26 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case 'A'..'Z', 'a'..'z', '_':
token.kind = Kind.Ident;
for t.offset < len(t.data) {
switch next_rune(t) {
case 'A'..'Z', 'a'..'z', '0'..'9', '_':
continue;
}
break;
}
skip_alphanum(t);
switch str := t.data[token.offset:t.offset]; str {
case "null": token.kind = Kind.Null;
case "false": token.kind = Kind.False;
case "true": token.kind = Kind.True;
case:
if t.spec == Specification.JSON5 do switch str {
case "Infinity": token.kind = Kind.Infinity;
case "NaN": token.kind = Kind.NaN;
}
}
case '+':
err = Error.Illegal_Character;
if t.spec != Specification.JSON5 {
break;
}
fallthrough;
case '-':
switch t.r {
case '0'..'9':
@@ -149,12 +199,46 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case:
// Illegal use of +/-
err = Error.Illegal_Character;
break;
if t.spec == Specification.JSON5 {
if t.r == 'I' || t.r == 'N' {
skip_alphanum(t);
}
switch t.data[token.offset:t.offset] {
case "-Infinity": token.kind = Kind.Infinity;
case "-NaN": token.kind = Kind.NaN;
}
}
break block;
}
fallthrough;
case '.':
err = Error.Illegal_Character;
if t.spec == Specification.JSON5 { // Allow leading decimal point
skip_digits(t);
if t.r == 'e' || t.r == 'E' {
switch r := next_rune(t); r {
case '+', '-':
next_rune(t);
}
skip_digits(t);
}
str := t.data[token.offset:t.offset];
if !is_valid_number(str, t.spec) {
err = Error.Invalid_Number;
}
}
case '0'..'9':
token.kind = Kind.Integer;
if t.spec == Specification.JSON5 { // Hexadecimal Numbers
if curr_rune == '0' && (t.r == 'x' || t.r == 'X') {
next_rune(t);
skip_hex_digits(t);
break;
}
}
skip_digits(t);
if t.r == '.' {
@@ -171,11 +255,17 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
str := t.data[token.offset:t.offset];
if !is_valid_number(str) {
if !is_valid_number(str, t.spec) {
err = Error.Invalid_Number;
}
case '\'':
err = Error.Illegal_Character;
if t.spec != Specification.JSON5 {
break;
}
fallthrough;
case '"':
token.kind = Kind.String;
quote := curr_rune;
@@ -194,10 +284,11 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
}
}
if !is_valid_string_literal(t.data[token.offset : t.offset]) {
if !is_valid_string_literal(t.data[token.offset : t.offset], t.spec) {
err = Error.Invalid_String;
}
case ',': token.kind = Kind.Comma;
case ':': token.kind = Kind.Colon;
case '{': token.kind = Kind.Open_Brace;
@@ -205,6 +296,30 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
case '[': token.kind = Kind.Open_Bracket;
case ']': token.kind = Kind.Close_Bracket;
case '/':
err = Error.Illegal_Character;
if t.spec == Specification.JSON5 {
switch t.r {
case '/':
// Single-line comments
skip_to_next_line(t);
return get_token(t);
case '*':
// None-nested multi-line comments
for t.offset < len(t.data) {
next_rune(t);
if t.r == '*' {
next_rune(t);
if t.r == '/' {
next_rune(t);
return get_token(t);
}
}
}
err = Error.EOF;
}
}
case: err = Error.Illegal_Character;
}
@@ -215,7 +330,7 @@ get_token :: proc(t: ^Tokenizer) -> (token: Token, err: Error) {
is_valid_number :: proc(s: string) -> bool {
is_valid_number :: proc(s: string, spec: Specification) -> bool {
if s == "" {
return false;
}
@@ -225,6 +340,13 @@ is_valid_number :: proc(s: string) -> bool {
if s == "" {
return false;
}
} else if spec == Specification.JSON5 {
if s[0] == '+' { // Allow positive sign
s = s[1:];
if s == "" {
return false;
}
}
}
switch s[0] {
@@ -233,10 +355,21 @@ is_valid_number :: proc(s: string) -> bool {
case '1'..'9':
s = s[1:];
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' do s = s[1:];
case '.':
if spec == Specification.JSON5 { // Allow leading decimal point
s = s[1:];
} else {
return false;
}
case:
return false;
}
if spec == Specification.JSON5 {
if len(s) == 1 && s[0] == '.' { // Allow trailing decimal point
return true;
}
}
if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' {
s = s[2:];
@@ -259,10 +392,23 @@ is_valid_number :: proc(s: string) -> bool {
return s == "";
}
is_valid_string_literal :: proc(s: string) -> bool {
if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
is_valid_string_literal :: proc(s: string, spec: Specification) -> bool {
if len(s) < 2 {
return false;
}
quote := s[0];
if s[0] != s[len(s)-1] {
return false;
}
if s[0] != '"' || s[len(s)-1] != '"' {
if spec == Specification.JSON5 {
if s[0] != '\'' || s[len(s)-1] != '\'' {
return false;
}
} else {
return false;
}
}
s = s[1 : len(s)-1];
i := 0;
@@ -301,7 +447,7 @@ is_valid_string_literal :: proc(s: string) -> bool {
case: return false;
}
case c == '"', c < ' ':
case c == quote, c < ' ':
return false;
case c < utf8.RUNE_SELF:
+5
View File
@@ -2,6 +2,11 @@ package json
import "core:strconv"
Specification :: enum {
JSON,
JSON5,
}
Null :: distinct rawptr;
Integer :: i64;
Float :: f64;
+32 -9
View File
@@ -3,19 +3,35 @@ package json
import "core:mem"
// NOTE(bill): is_valid will not check for duplicate keys
is_valid :: proc(data: string) -> bool {
p := make_parser(data, mem.nil_allocator());
is_valid :: proc(data: string, spec := Specification.JSON) -> bool {
p := make_parser(data, spec, mem.nil_allocator());
if p.spec == Specification.JSON5 {
return validate_value(&p);
}
return validate_object(&p);
}
validate_object_key :: proc(p: ^Parser) -> bool {
tok := p.curr_token;
if p.spec == Specification.JSON5 {
if tok.kind == Kind.String {
expect_token(p, Kind.String);
return true;
} else if tok.kind == Kind.Ident {
expect_token(p, Kind.Ident);
return true;
}
}
err := expect_token(p, Kind.String);
return err == Error.None;
}
validate_object :: proc(p: ^Parser) -> bool {
if err := expect_token(p, Kind.Open_Brace); err != Error.None {
return false;
}
for p.curr_token.kind != Kind.Close_Brace {
tok := p.curr_token;
if tok_err := expect_token(p, Kind.String); tok_err != Error.None {
if !validate_object_key(p) {
return false;
}
if colon_err := expect_token(p, Kind.Colon); colon_err != Error.None {
@@ -26,11 +42,18 @@ validate_object :: proc(p: ^Parser) -> bool {
return false;
}
// Disallow trailing commas for the time being
if allow_token(p, Kind.Comma) {
continue;
if p.spec == Specification.JSON5 {
// Allow trailing commas
if allow_token(p, Kind.Comma) {
continue;
}
} else {
break;
// Disallow trailing commas
if allow_token(p, Kind.Comma) {
continue;
} else {
break;
}
}
}
@@ -85,7 +108,7 @@ validate_value :: proc(p: ^Parser) -> bool {
return true;
case Kind.String:
advance_token(p);
return is_valid_string_literal(token.text);
return is_valid_string_literal(token.text, p.spec);
case Kind.Open_Brace:
return validate_object(p);