diff --git a/core/encoding/cel/cel.odin b/core/encoding/cel/cel.odin deleted file mode 100644 index 94f9281b3..000000000 --- a/core/encoding/cel/cel.odin +++ /dev/null @@ -1,852 +0,0 @@ -package cel; - -import "core:fmt" -import "core:strconv" -import "core:unicode/utf8" -import "core:strings" - -Array :: []Value; -Dict :: map[string]Value; -Nil_Value :: struct{}; - -Value :: union { - Nil_Value, - bool, i64, f64, string, - Array, Dict, -} - -Parser :: struct { - tokens: [dynamic]Token, - prev_token: Token, - curr_token: Token, - curr_token_index: int, - - allocated_strings: [dynamic]string, - - error_count: int, - - root: Dict, - dict_stack: [dynamic]^Dict, // NOTE: Pointers may be stored on the stack -} - - -print_value :: proc(value: Value, pretty := true, indent := 0) { - print_indent :: proc(indent: int) { - for _ in 0.. 0 { fmt.print(", "); } - print_value(e); - } - } - if pretty { print_indent(indent); } - fmt.print("]"); - case Dict: - fmt.print("{"); - if pretty { fmt.println(); } - - i := 0; - for name, val in v { - if pretty { - print_indent(indent+1); - fmt.printf("%s = ", name); - print_value(val, pretty, indent+1); - fmt.println(","); - } else { - if i > 0 { fmt.print(", "); } - fmt.printf("%s = ", name); - print_value(val, pretty, indent+1); - i += 1; - } - } - - if pretty { print_indent(indent); } - fmt.print("}"); - case: - fmt.print("nil"); - case Nil_Value: - fmt.print("nil"); - } -} -print :: proc(p: ^Parser, pretty := false) { - for name, val in p.root { - fmt.printf("%s = ", name); - print_value(val, pretty); - fmt.println(";"); - } -} - -create_from_string :: proc(src: string) -> (^Parser, bool) { - return init(transmute([]byte)src); -} - - -init :: proc(src: []byte) -> (^Parser, bool) { - t: Tokenizer; - tokenizer_init(&t, src); - return create_from_tokenizer(&t); -} - - -create_from_tokenizer :: proc(t: ^Tokenizer) -> (^Parser, bool) { - p := new(Parser); - for { - tok := scan(t); - if tok.kind == .Illegal { - return p, false; - } - append(&p.tokens, tok); - if tok.kind == .EOF { - break; - } - } - - if t.error_count > 0 { - return p, false; - } - - if len(p.tokens) == 0 { - tok := Token{kind = .EOF}; - tok.line, tok.column = 1, 1; - append(&p.tokens, tok); - return p, true; - } - - p.curr_token_index = 0; - p.prev_token = p.tokens[p.curr_token_index]; - p.curr_token = p.tokens[p.curr_token_index]; - - p.root = Dict{}; - p.dict_stack = make([dynamic]^Dict, 0, 4); - append(&p.dict_stack, &p.root); - - for p.curr_token.kind != .EOF && - p.curr_token.kind != .Illegal && - p.curr_token_index < len(p.tokens) { - if !parse_assignment(p) { - break; - } - } - - return p, true; -} - -destroy :: proc(p: ^Parser) { - destroy_value :: proc(value: Value) { - #partial switch v in value { - case Array: - for elem in v { - destroy_value(elem); - } - delete(v); - - case Dict: - for _, dv in v { - destroy_value(dv); - } - delete(v); - } - } - - delete(p.tokens); - for s in p.allocated_strings { - delete(s); - } - delete(p.allocated_strings); - delete(p.dict_stack); - - destroy_value(p.root); - free(p); -} - -error :: proc(p: ^Parser, pos: Pos, msg: string, args: ..any) { - fmt.eprintf("%s(%d:%d) Error: ", pos.file, pos.line, pos.column); - fmt.eprintf(msg, ..args); - fmt.eprintln(); - - p.error_count += 1; -} - -next_token :: proc(p: ^Parser) -> Token { - p.prev_token = p.curr_token; - prev := p.prev_token; - - if p.curr_token_index+1 < len(p.tokens) { - p.curr_token_index += 1; - p.curr_token = p.tokens[p.curr_token_index]; - return prev; - } - p.curr_token_index = len(p.tokens); - p.curr_token = p.tokens[p.curr_token_index-1]; - error(p, prev.pos, "Token is EOF"); - return prev; -} - -unquote_char :: proc(str: string, quote: byte) -> (r: rune, multiple_bytes: bool, tail_string: string, success: bool) { - hex_to_int :: proc(c: byte) -> int { - switch c { - case '0'..='9': return int(c-'0'); - case 'a'..='f': return int(c-'a')+10; - case 'A'..='F': return int(c-'A')+10; - } - return -1; - } - w: int; - - if str[0] == quote && quote == '"' { - return; - } else if str[0] >= 0x80 { - r, w = utf8.decode_rune_in_string(str); - return r, true, str[w:], true; - } else if str[0] != '\\' { - return rune(str[0]), false, str[1:], true; - } - - if len(str) <= 1 { - return; - } - s := str; - c := s[1]; - s = s[2:]; - - switch c { - case: - return; - - case 'a': r = '\a'; - case 'b': r = '\b'; - case 'f': r = '\f'; - case 'n': r = '\n'; - case 'r': r = '\r'; - case 't': r = '\t'; - case 'v': r = '\v'; - case '\\': r = '\\'; - - case '"': r = '"'; - case '\'': r = '\''; - - case '0'..='7': - v := int(c-'0'); - if len(s) < 2 { - return; - } - for i in 0.. 7 { - return; - } - v = (v<<3) | d; - } - s = s[2:]; - if v > 0xff { - return; - } - r = rune(v); - - case 'x', 'u', 'U': - count: int; - switch c { - case 'x': count = 2; - case 'u': count = 4; - case 'U': count = 8; - } - - if len(s) < count { - return; - } - - for i in 0.. utf8.MAX_RUNE { - return; - } - multiple_bytes = true; - } - - success = true; - tail_string = s; - return; -} - - -unquote_string :: proc(p: ^Parser, t: Token) -> (string, bool) { - if t.kind != .String { - return t.lit, true; - } - s := t.lit; - quote := '"'; - - if s == `""` { - return "", true; - } - - if strings.contains_rune(s, '\n') >= 0 { - return s, false; - } - - if strings.contains_rune(s, '\\') < 0 && strings.contains_rune(s, quote) < 0 { - if quote == '"' { - return s, true; - } - } - - - buf_len := 3*len(s) / 2; - buf := make([]byte, buf_len); - offset := 0; - for len(s) > 0 { - r, multiple_bytes, tail_string, ok := unquote_char(s, byte(quote)); - if !ok { - delete(buf); - return s, false; - } - s = tail_string; - if r < 0x80 || !multiple_bytes { - buf[offset] = byte(r); - offset += 1; - } else { - b, w := utf8.encode_rune(r); - copy(buf[offset:], b[:w]); - offset += w; - } - } - - new_string := string(buf[:offset]); - - append(&p.allocated_strings, new_string); - - return new_string, true; -} - - -allow_token :: proc(p: ^Parser, kind: Kind) -> bool { - if p.curr_token.kind == kind { - next_token(p); - return true; - } - return false; -} - -expect_token :: proc(p: ^Parser, kind: Kind) -> Token { - prev := p.curr_token; - if prev.kind != kind { - got := prev.lit; - if got == "\n" { - got = ";"; - } - error(p, prev.pos, "Expected %s, got %s", kind_to_string[kind], got); - } - next_token(p); - return prev; -} - -expect_operator :: proc(p: ^Parser) -> Token { - prev := p.curr_token; - if !is_operator(prev.kind) { - error(p, prev.pos, "Expected an operator, got %s", prev.lit); - } - - - next_token(p); - return prev; -} - -fix_advance :: proc(p: ^Parser) { - for { - #partial switch t := p.curr_token; t.kind { - case .EOF, .Semicolon: - return; - } - next_token(p); - } -} - -copy_value :: proc(value: Value) -> Value { - #partial switch v in value { - case Array: - a := make(Array, len(v)); - for elem, idx in v { - a[idx] = copy_value(elem); - } - return a; - case Dict: - d := make(Dict, cap(v)); - for key, val in v { - d[key] = copy_value(val); - } - return d; - } - return value; -} - -lookup_value :: proc(p: ^Parser, name: string) -> (Value, bool) { - for i := len(p.dict_stack)-1; i >= 0; i -= 1 { - d := p.dict_stack[i]; - if val, ok := d[name]; ok { - return copy_value(val), true; - } - } - - return nil, false; -} - -parse_operand :: proc(p: ^Parser) -> (Value, Pos) { - tok := p.curr_token; - #partial switch p.curr_token.kind { - case .Ident: - next_token(p); - v, ok := lookup_value(p, tok.lit); - if !ok { error(p, tok.pos, "Undeclared identifier %s", tok.lit); } - return v, tok.pos; - - case .True: - next_token(p); - return true, tok.pos; - case .False: - next_token(p); - return false, tok.pos; - - case .Nil: - next_token(p); - return Nil_Value{}, tok.pos; - - case .Integer: - next_token(p); - i, _ := strconv.parse_i64(tok.lit); - return i, tok.pos; - - case .Float: - next_token(p); - f, _ := strconv.parse_f64(tok.lit); - return f, tok.pos; - - case .String: - next_token(p); - str, ok := unquote_string(p, tok); - if !ok { error(p, tok.pos, "Unable to unquote string"); } - return string(str), tok.pos; - - case .Open_Paren: - expect_token(p, .Open_Paren); - expr, _ := parse_expr(p); - expect_token(p, .Close_Paren); - return expr, tok.pos; - - case .Open_Bracket: - expect_token(p, .Open_Bracket); - elems := make([dynamic]Value, 0, 4); - for p.curr_token.kind != .Close_Bracket && - p.curr_token.kind != .EOF { - elem, _ := parse_expr(p); - append(&elems, elem); - - if p.curr_token.kind == .Semicolon && p.curr_token.lit == "\n" { - next_token(p); - } else if !allow_token(p, .Comma) { - break; - } - - } - expect_token(p, .Close_Bracket); - return Array(elems[:]), tok.pos; - - case .Open_Brace: - expect_token(p, .Open_Brace); - - dict := Dict{}; - append(&p.dict_stack, &dict); - defer pop(&p.dict_stack); - - for p.curr_token.kind != .Close_Brace && - p.curr_token.kind != .EOF { - name_tok := p.curr_token; - if !allow_token(p, .Ident) && !allow_token(p, .String) { - name_tok = expect_token(p, .Ident); - } - - name, ok := unquote_string(p, name_tok); - if !ok { error(p, tok.pos, "Unable to unquote string"); } - expect_token(p, .Assign); - elem, _ := parse_expr(p); - - if _, ok2 := dict[name]; ok2 { - error(p, name_tok.pos, "Previous declaration of %s in this scope", name); - } else { - dict[name] = elem; - } - - if p.curr_token.kind == .Semicolon && p.curr_token.lit == "\n" { - next_token(p); - } else if !allow_token(p, .Comma) { - break; - } - } - expect_token(p, .Close_Brace); - return dict, tok.pos; - - } - return nil, tok.pos; -} - -parse_atom_expr :: proc(p: ^Parser, operand: Value, pos: Pos) -> (Value, Pos) { - loop := true; - for operand := operand; loop; { - #partial switch p.curr_token.kind { - case .Period: - next_token(p); - tok := next_token(p); - - #partial switch tok.kind { - case .Ident: - d, ok := operand.(Dict); - if !ok || d == nil { - error(p, tok.pos, "Expected a dictionary"); - operand = nil; - continue; - } - name, usok := unquote_string(p, tok); - if !usok { error(p, tok.pos, "Unable to unquote string"); } - val, found := d[name]; - if !found { - error(p, tok.pos, "Field %s not found in dictionary", name); - operand = nil; - continue; - } - operand = val; - case: - error(p, tok.pos, "Expected a selector, got %s", tok.kind); - operand = nil; - } - - case .Open_Bracket: - expect_token(p, .Open_Bracket); - index, index_pos := parse_expr(p); - expect_token(p, .Close_Bracket); - - - #partial switch a in operand { - case Array: - i, ok := index.(i64); - if !ok { - error(p, index_pos, "Index must be an integer for an array"); - operand = nil; - continue; - } - - if 0 <= i && i < i64(len(a)) { - operand = a[i]; - } else { - error(p, index_pos, "Index %d out of bounds range 0..%d", i, len(a)); - operand = nil; - continue; - } - - case Dict: - key, ok := index.(string); - if !ok { - error(p, index_pos, "Index must be a string for a dictionary"); - operand = nil; - continue; - } - - val, found := a[key]; - if found { - operand = val; - } else { - error(p, index_pos, "`%s` was not found in the dictionary", key); - operand = nil; - continue; - } - - - - case: - error(p, index_pos, "Indexing is only allowed on an array or dictionary"); - } - - case: - loop = false; - } - } - - return operand, pos; -} - -parse_unary_expr :: proc(p: ^Parser) -> (Value, Pos) { - op := p.curr_token; - #partial switch p.curr_token.kind { - case .At: - next_token(p); - tok := expect_token(p, .String); - v, ok := lookup_value(p, tok.lit); - if !ok { error(p, tok.pos, "Undeclared identifier %s", tok.lit); } - return parse_atom_expr(p, v, tok.pos); - - case .Add, .Sub: - next_token(p); - // TODO(bill): Calcuate values as you go! - expr, pos := parse_unary_expr(p); - - #partial switch e in expr { - case i64: if op.kind == .Sub { return -e, pos; } - case f64: if op.kind == .Sub { return -e, pos; } - case: - error(p, op.pos, "Unary operator %s can only be used on integers or floats", op.lit); - return nil, op.pos; - } - - return expr, op.pos; - - case .Not: - next_token(p); - expr, _ := parse_unary_expr(p); - if v, ok := expr.(bool); ok { - return !v, op.pos; - } - error(p, op.pos, "Unary operator %s can only be used on booleans", op.lit); - return nil, op.pos; - } - - return parse_atom_expr(p, parse_operand(p)); -} - - -value_order :: proc(v: Value) -> int { - #partial switch _ in v { - case bool, string: - return 1; - case i64: - return 2; - case f64: - return 3; - } - return 0; -} - -match_values :: proc(left, right: ^Value) -> bool { - if value_order(right^) < value_order(left^) { - return match_values(right, left); - } - - #partial switch x in left^ { - case: - right^ = left^; - case bool, string: - return true; - case i64: - #partial switch y in right^ { - case i64: - return true; - case f64: - left^ = f64(x); - return true; - } - - case f64: - #partial switch y in right { - case f64: - return true; - } - } - - return false; -} - -calculate_binary_value :: proc(p: ^Parser, op: Kind, a_, b_: Value) -> (Value, bool) { - // TODO(bill): Calculate value as you go! - x, y := a_, b_; - match_values(&x, &y); - - - #partial switch a in x { - case: return x, true; - - case bool: - b, ok := y.(bool); - if !ok { return nil, false; } - #partial switch op { - case .Eq: return a == b, true; - case .NotEq: return a != b, true; - case .And: return a && b, true; - case .Or: return a || b, true; - } - - case i64: - b, ok := y.(i64); - if !ok { return nil, false; } - #partial switch op { - case .Add: return a + b, true; - case .Sub: return a - b, true; - case .Mul: return a * b, true; - case .Quo: return a / b, true; - case .Rem: return a % b, true; - case .Eq: return a == b, true; - case .NotEq: return a != b, true; - case .Lt: return a < b, true; - case .Gt: return a > b, true; - case .LtEq: return a <= b, true; - case .GtEq: return a >= b, true; - } - - case f64: - b, ok := y.(f64); - if !ok { return nil, false; } - - #partial switch op { - case .Add: return a + b, true; - case .Sub: return a - b, true; - case .Mul: return a * b, true; - case .Quo: return a / b, true; - case .Eq: return a == b, true; - case .NotEq: return a != b, true; - case .Lt: return a < b, true; - case .Gt: return a > b, true; - case .LtEq: return a <= b, true; - case .GtEq: return a >= b, true; - } - - case string: - b, ok := y.(string); - if !ok { return nil, false; } - - #partial switch op { - case .Add: - n := len(a) + len(b); - data := make([]byte, n); - copy(data[:], a); - copy(data[len(a):], b); - s := string(data); - append(&p.allocated_strings, s); - return s, true; - - case .Eq: return a == b, true; - case .NotEq: return a != b, true; - case .Lt: return a < b, true; - case .Gt: return a > b, true; - case .LtEq: return a <= b, true; - case .GtEq: return a >= b, true; - } - } - - return nil, false; -} - -parse_binary_expr :: proc(p: ^Parser, prec_in: int) -> (Value, Pos) { - expr, pos := parse_unary_expr(p); - for prec := precedence(p.curr_token.kind); prec >= prec_in; prec -= 1 { - for { - op := p.curr_token; - op_prec := precedence(op.kind); - if op_prec != prec { - break; - } - expect_operator(p); - - if op.kind == .Question { - cond := expr; - x, _ := parse_expr(p); - expect_token(p, .Colon); - y, _ := parse_expr(p); - - if t, ok := cond.(bool); ok { - expr = t ? x : y; - } else { - error(p, pos, "Condition must be a boolean"); - } - - } else { - right, right_pos := parse_binary_expr(p, prec+1); - if right == nil { - error(p, right_pos, "Expected expression on the right-hand side of the binary operator %s", op.lit); - } - left := expr; - ok: bool; - expr, ok = calculate_binary_value(p, op.kind, left, right); - if !ok { - error(p, pos, "Invalid binary operation"); - } - } - } - } - return expr, pos; -} - -parse_expr :: proc(p: ^Parser) -> (Value, Pos) { - return parse_binary_expr(p, 1); -} - -expect_semicolon :: proc(p: ^Parser) { - kind := p.curr_token.kind; - - #partial switch kind { - case .Comma: - error(p, p.curr_token.pos, "Expected ';', got ','"); - next_token(p); - case .Semicolon: - next_token(p); - case .EOF: - // okay - case: - error(p, p.curr_token.pos, "Expected ';', got %s", p.curr_token.lit); - fix_advance(p); - } -} - -parse_assignment :: proc(p: ^Parser) -> bool { - top_dict :: proc(p: ^Parser) -> ^Dict { - assert(len(p.dict_stack) > 0); - return p.dict_stack[len(p.dict_stack)-1]; - } - - if p.curr_token.kind == .Semicolon { - next_token(p); - return true; - } - if p.curr_token.kind == .EOF { - return false; - } - - tok := p.curr_token; - if allow_token(p, .Ident) || allow_token(p, .String) { - expect_token(p, .Assign); - name, ok := unquote_string(p, tok); - if !ok { error(p, tok.pos, "Unable to unquote string"); } - expr, _ := parse_expr(p); - d := top_dict(p); - if _, ok2 := d[name]; ok2 { - error(p, tok.pos, "Previous declaration of %s", name); - } else { - d[name] = expr; - } - expect_semicolon(p); - return true; - } - error(p, tok.pos, "Expected an assignment, got %s", kind_to_string[tok.kind]); - fix_advance(p); - return false; -} diff --git a/core/encoding/cel/doc.odin b/core/encoding/cel/doc.odin deleted file mode 100644 index 2d224d292..000000000 --- a/core/encoding/cel/doc.odin +++ /dev/null @@ -1,51 +0,0 @@ -/* - package cel - - sample := ` - x = 123; - y = 321.456; - z = x * (y - 1) / 2; - w = "foo" + "bar"; - - # This is a comment - - asd = "Semicolons are optional" - - a = {id = {b = 123}} # Dict - b = a.id.b - - f = [1, 4, 9] # Array - g = f[2] - - h = x < y and w == "foobar" - i = h ? 123 : "google" - - j = nil - - "127.0.0.1" = "value" # Keys can be strings - - "foo" = { - "bar" = { - "baz" = 123, # optional commas if newline is present - "zab" = 456, - "abz" = 789, - }, - }; - - bar = @"foo"["bar"].baz - `; - - - main :: proc() { - p, ok := create_from_string(sample); - if !ok { - return; - } - defer destroy(p); - - if p.error_count == 0 { - print(p); - } - } -*/ -package cel diff --git a/core/encoding/cel/token.odin b/core/encoding/cel/token.odin deleted file mode 100644 index 47649b913..000000000 --- a/core/encoding/cel/token.odin +++ /dev/null @@ -1,523 +0,0 @@ -package cel - -import "core:fmt" -import "core:unicode/utf8" - -Kind :: enum { - Illegal, - EOF, - Comment, - - _literal_start, - Ident, - Integer, - Float, - Char, - String, - _literal_end, - - _keyword_start, - True, // true - False, // false - Nil, // nil - _keyword_end, - - - _operator_start, - Question, // ? - - And, // and - Or, // or - - Add, // + - Sub, // - - Mul, // * - Quo, // / - Rem, // % - - Not, // ! - - Eq, // == - NotEq, // != - Lt, // < - Gt, // > - LtEq, // <= - GtEq, // >= - - At, // @ - _operator_end, - - _punc_start, - Assign, // = - - Open_Paren, // ( - Close_Paren, // ) - Open_Bracket, // [ - Close_Bracket, // ] - Open_Brace, // { - Close_Brace, // } - - Colon, // : - Semicolon, // ; - Comma, // , - Period, // . - _punc_end, -} - - -Pos :: struct { - file: string, - line: int, - column: int, -} - -Token :: struct { - kind: Kind, - using pos: Pos, - lit: string, -} - -Tokenizer :: struct { - src: []byte, - - file: string, // May not be used - - curr_rune: rune, - offset: int, - read_offset: int, - line_offset: int, - line_count: int, - - insert_semi: bool, - - error_count: int, -} - - -keywords := map[string]Kind{ - "true" = .True, - "false" = .False, - "nil" = .Nil, - "and" = .And, - "or" = .Or, -}; - -kind_to_string := [len(Kind)]string{ - "illegal", - "EOF", - "comment", - - "", - "identifier", - "integer", - "float", - "character", - "string", - "", - - "", - "true", "false", "nil", - "", - - "", - "?", "and", "or", - "+", "-", "*", "/", "%", - "!", - "==", "!=", "<", ">", "<=", ">=", - "@", - "", - - "", - "=", - "(", ")", - "[", "]", - "{", "}", - ":", ";", ",", ".", - "", -}; - -precedence :: proc(op: Kind) -> int { - #partial switch op { - case .Question: - return 1; - case .Or: - return 2; - case .And: - return 3; - case .Eq, .NotEq, .Lt, .Gt, .LtEq, .GtEq: - return 4; - case .Add, .Sub: - return 5; - case .Mul, .Quo, .Rem: - return 6; - } - return 0; -} - - -token_lookup :: proc(ident: string) -> Kind { - if tok, is_keyword := keywords[ident]; is_keyword { - return tok; - } - return .Ident; -} - -is_literal :: proc(tok: Kind) -> bool { return ._literal_start < tok && tok < ._literal_end; } -is_operator :: proc(tok: Kind) -> bool { return ._operator_start < tok && tok < ._operator_end; } -is_keyword :: proc(tok: Kind) -> bool { return ._keyword_start < tok && tok < ._keyword_end; } - - -tokenizer_init :: proc(t: ^Tokenizer, src: []byte, file := "") { - t.src = src; - t.file = file; - t.curr_rune = ' '; - t.offset = 0; - t.read_offset = 0; - t.line_offset = 0; - t.line_count = 1; - - advance_to_next_rune(t); - if t.curr_rune == utf8.RUNE_BOM { - advance_to_next_rune(t); - } -} - -token_error :: proc(t: ^Tokenizer, msg: string, args: ..any) { - fmt.eprintf("%s(%d:%d) Error: ", t.file, t.line_count, t.read_offset-t.line_offset+1); - fmt.eprintf(msg, ..args); - fmt.eprintln(); - t.error_count += 1; -} - -advance_to_next_rune :: proc(t: ^Tokenizer) { - if t.read_offset < len(t.src) { - t.offset = t.read_offset; - if t.curr_rune == '\n' { - t.line_offset = t.offset; - t.line_count += 1; - } - r, w := rune(t.src[t.read_offset]), 1; - switch { - case r == 0: - token_error(t, "Illegal character NUL"); - case r >= utf8.RUNE_SELF: - r, w = utf8.decode_rune(t.src[t.read_offset:]); - if r == utf8.RUNE_ERROR && w == 1 { - token_error(t, "Illegal utf-8 encoding"); - } else if r == utf8.RUNE_BOM && t.offset > 0 { - token_error(t, "Illegal byte order mark"); - } - } - - t.read_offset += w; - t.curr_rune = r; - } else { - t.offset = len(t.src); - if t.curr_rune == '\n' { - t.line_offset = t.offset; - t.line_count += 1; - } - t.curr_rune = utf8.RUNE_EOF; - } -} - - -get_pos :: proc(t: ^Tokenizer) -> Pos { - return Pos { - file = t.file, - line = t.line_count, - column = t.offset - t.line_offset + 1, - }; -} - -is_letter :: proc(r: rune) -> bool { - switch r { - case 'a'..='z', 'A'..='Z', '_': - return true; - } - return false; -} - -is_digit :: proc(r: rune) -> bool { - switch r { - case '0'..='9': - return true; - } - return false; -} - -skip_whitespace :: proc(t: ^Tokenizer) { - loop: for { - switch t.curr_rune { - case '\n': - if t.insert_semi { - break loop; - } - fallthrough; - case ' ', '\t', '\r', '\v', '\f': - advance_to_next_rune(t); - - case: - break loop; - } - } -} - -scan_identifier :: proc(t: ^Tokenizer) -> string { - offset := t.offset; - for is_letter(t.curr_rune) || is_digit(t.curr_rune) { - advance_to_next_rune(t); - } - return string(t.src[offset : t.offset]); -} - -digit_value :: proc(r: rune) -> int { - switch r { - case '0'..='9': return int(r - '0'); - case 'a'..='f': return int(r - 'a' + 10); - case 'A'..='F': return int(r - 'A' + 10); - } - return 16; -} - -scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Kind, string) { - scan_mantissa :: proc(t: ^Tokenizer, base: int) { - for digit_value(t.curr_rune) < base || t.curr_rune == '_' { - advance_to_next_rune(t); - } - } - scan_exponent :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (kind: Kind, text: string) { - kind = tok; - if t.curr_rune == 'e' || t.curr_rune == 'E' { - kind = .Float; - advance_to_next_rune(t); - if t.curr_rune == '-' || t.curr_rune == '+' { - advance_to_next_rune(t); - } - if digit_value(t.curr_rune) < 10 { - scan_mantissa(t, 10); - } else { - token_error(t, "Illegal floating point exponent"); - } - } - text = string(t.src[offset : t.offset]); - return; - } - scan_fraction :: proc(t: ^Tokenizer, tok: Kind, offset: int) -> (kind: Kind, text: string) { - kind = tok; - if t.curr_rune == '.' { - kind = .Float; - advance_to_next_rune(t); - scan_mantissa(t, 10); - } - - return scan_exponent(t, kind, offset); - } - - offset := t.offset; - tok := Kind.Integer; - - if seen_decimal_point { - offset -= 1; - tok = .Float; - scan_mantissa(t, 10); - return scan_exponent(t, tok, offset); - } - - if t.curr_rune == '0' { - offset = t.offset; - advance_to_next_rune(t); - switch t.curr_rune { - case 'b', 'B': - advance_to_next_rune(t); - scan_mantissa(t, 2); - if t.offset - offset <= 2 { - token_error(t, "Illegal binary number"); - } - case 'o', 'O': - advance_to_next_rune(t); - scan_mantissa(t, 8); - if t.offset - offset <= 2 { - token_error(t, "Illegal octal number"); - } - case 'x', 'X': - advance_to_next_rune(t); - scan_mantissa(t, 16); - if t.offset - offset <= 2 { - token_error(t, "Illegal hexadecimal number"); - } - case: - scan_mantissa(t, 10); - switch t.curr_rune { - case '.', 'e', 'E': - return scan_fraction(t, tok, offset); - } - } - - return tok, string(t.src[offset:t.offset]); - } - - scan_mantissa(t, 10); - - return scan_fraction(t, tok, offset); -} - -scan :: proc(t: ^Tokenizer) -> Token { - skip_whitespace(t); - - offset := t.offset; - - tok: Kind; - pos := get_pos(t); - lit: string; - - insert_semi := false; - - - switch r := t.curr_rune; { - case is_letter(r): - insert_semi = true; - lit = scan_identifier(t); - tok = .Ident; - if len(lit) > 1 { - tok = token_lookup(lit); - } - - case '0' <= r && r <= '9': - insert_semi = true; - tok, lit = scan_number(t, false); - - case: - advance_to_next_rune(t); - switch r { - case -1: - if t.insert_semi { - t.insert_semi = false; - return Token{.Semicolon, pos, "\n"}; - } - return Token{.EOF, pos, "\n"}; - - case '\n': - t.insert_semi = false; - return Token{.Semicolon, pos, "\n"}; - - case '"': - insert_semi = true; - quote := r; - tok = .String; - for { - this_r := t.curr_rune; - if this_r == '\n' || r < 0 { - token_error(t, "String literal not terminated"); - break; - } - advance_to_next_rune(t); - if this_r == quote { - break; - } - // TODO(bill); Handle properly - if this_r == '\\' && t.curr_rune == quote { - advance_to_next_rune(t); - } - } - - lit = string(t.src[offset+1:t.offset-1]); - - - case '#': - for t.curr_rune != '\n' && t.curr_rune >= 0 { - advance_to_next_rune(t); - } - if t.insert_semi { - t.insert_semi = false; - return Token{.Semicolon, pos, "\n"}; - } - // Recursive! - return scan(t); - - case '?': tok = .Question; - case ':': tok = .Colon; - case '@': tok = .At; - - case ';': - tok = .Semicolon; - lit = ";"; - case ',': tok = .Comma; - - case '(': - tok = .Open_Paren; - case ')': - insert_semi = true; - tok = .Close_Paren; - - case '[': - tok = .Open_Bracket; - case ']': - insert_semi = true; - tok = .Close_Bracket; - - case '{': - tok = .Open_Brace; - case '}': - insert_semi = true; - tok = .Close_Brace; - - case '+': tok = .Add; - case '-': tok = .Sub; - case '*': tok = .Mul; - case '/': tok = .Quo; - case '%': tok = .Rem; - - case '!': - tok = .Not; - if t.curr_rune == '=' { - advance_to_next_rune(t); - tok = .NotEq; - } - - case '=': - tok = .Assign; - if t.curr_rune == '=' { - advance_to_next_rune(t); - tok = .Eq; - } - - case '<': - tok = .Lt; - if t.curr_rune == '=' { - advance_to_next_rune(t); - tok = .LtEq; - } - - case '>': - tok = .Gt; - if t.curr_rune == '=' { - advance_to_next_rune(t); - tok = .GtEq; - } - - case '.': - if '0' <= t.curr_rune && t.curr_rune <= '9' { - insert_semi = true; - tok, lit = scan_number(t, true); - } else { - tok = .Period; - } - - case: - if r != utf8.RUNE_BOM { - token_error(t, "Illegal character '%r'", r); - } - insert_semi = t.insert_semi; - tok = .Illegal; - } - } - - t.insert_semi = insert_semi; - - if lit == "" { - lit = string(t.src[offset:t.offset]); - } - - return Token{tok, pos, lit}; -}