diff --git a/core/bufio/scanner.odin b/core/bufio/scanner.odin new file mode 100644 index 000000000..1dd13f19a --- /dev/null +++ b/core/bufio/scanner.odin @@ -0,0 +1,340 @@ +package bufio + +import "core:bytes" +import "core:io" +import "core:mem" +import "core:unicode/utf8" +import "intrinsics" + +// Extra errors returns by scanning procedures +Scanner_Extra_Error :: enum i32 { + Negative_Advance, + Advanced_Too_Far, + Bad_Read_Count, + Too_Long, + Too_Short, +} + +Scanner_Error :: union { + io.Error, + Scanner_Extra_Error, +} + +// Split_Proc is the signature of the split procedure used to tokenize the input. +Split_Proc :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool); + +Scanner :: struct { + r: io.Reader, + split: Split_Proc, + + buf: [dynamic]byte, + max_token_size: int, + start: int, + end: int, + token: []byte, + + _err: Scanner_Error, + max_consecutive_empty_reads: int, + successive_empty_token_count: int, + scan_called: bool, + done: bool, +} + +DEFAULT_MAX_SCAN_TOKEN_SIZE :: 1<<16; + +@(private) +_INIT_BUF_SIZE :: 4096; + +scanner_init :: proc(s: ^Scanner, r: io.Reader, buf_allocator := context.allocator) -> ^Scanner { + s.r = r; + s.split = scan_lines; + s.max_token_size = DEFAULT_MAX_SCAN_TOKEN_SIZE; + s.buf.allocator = buf_allocator; + return s; +} +scanner_init_with_buffer :: proc(s: ^Scanner, r: io.Reader, buf: []byte) -> ^Scanner { + s.r = r; + s.split = scan_lines; + s.max_token_size = DEFAULT_MAX_SCAN_TOKEN_SIZE; + s.buf = mem.buffer_from_slice(buf); + resize(&s.buf, cap(s.buf)); + return s; +} +scanner_destroy :: proc(s: ^Scanner) { + delete(s.buf); +} + + +// Returns the first non-EOF error that was encounted by the scanner +scanner_error :: proc(s: ^Scanner) -> Scanner_Error { + switch s._err { + case .EOF, .None: + return nil; + } + return s._err; +} + +// Returns the most recent token created by scanner_scan. +// The underlying array may point to data that may be overwritten +// by another call to scanner_scan. +// Treat the returned value as if it is immutable. +scanner_bytes :: proc(s: ^Scanner) -> []byte { + return s.token; +} + +// Returns the most recent token created by scanner_scan. +// The underlying array may point to data that may be overwritten +// by another call to scanner_scan. +// Treat the returned value as if it is immutable. +scanner_text :: proc(s: ^Scanner) -> string { + return string(s.token); +} + +// scanner_scan advances the scanner +scanner_scan :: proc(s: ^Scanner) -> bool { + set_err :: proc(s: ^Scanner, err: Scanner_Error) { + err := err; + if err == .None { + err = nil; + } + switch s._err { + case nil, .EOF: + s._err = err; + } + } + + if s.done { + return false; + } + s.scan_called = true; + + for { + // Check if a token is possible with what is available + // Allow the split procedure to recover if it fails + if s.start < s.end || s._err != nil { + advance, token, err, final_token := s.split(s.buf[s.start:s.end], s._err != nil); + if final_token { + s.token = token; + s.done = true; + return true; + } + if err != nil { + set_err(s, err); + return false; + } + + // Do advance + if advance < 0 { + set_err(s, .Negative_Advance); + return false; + } + if advance > s.end-s.start { + set_err(s, .Advanced_Too_Far); + return false; + } + s.start += advance; + + s.token = token; + if s.token != nil { + if s._err == nil || advance > 0 { + s.successive_empty_token_count = 0; + } else { + s.successive_empty_token_count += 1; + + if s.max_consecutive_empty_reads <= 0 { + s.max_consecutive_empty_reads = DEFAULT_MAX_CONSECUTIVE_EMPTY_READS; + } + if s.successive_empty_token_count > s.max_consecutive_empty_reads { + set_err(s, .No_Progress); + return false; + } + } + return true; + } + } + + // If an error is hit, no token can be created + if s._err != nil { + s.start = 0; + s.end = 0; + return false; + } + + // More data must be required to be read + if s.start > 0 && (s.end == len(s.buf) || s.start > len(s.buf)/2) { + copy(s.buf[:], s.buf[s.start:s.end]); + s.end -= s.start; + s.start = 0; + } + + could_be_too_short := false; + + // Resize the buffer if full + if s.end == len(s.buf) { + if s.max_token_size <= 0 { + s.max_token_size = DEFAULT_MAX_SCAN_TOKEN_SIZE; + } + if len(s.buf) >= s.max_token_size { + set_err(s, .Too_Long); + return false; + } + // overflow check + new_size := _INIT_BUF_SIZE; + if len(s.buf) > 0 { + overflowed: bool; + if new_size, overflowed = intrinsics.overflow_mul(len(s.buf), 2); overflowed { + set_err(s, .Too_Long); + return false; + } + } + + old_size := len(s.buf); + new_size = min(new_size, s.max_token_size); + resize(&s.buf, new_size); + s.end -= s.start; + s.start = 0; + + could_be_too_short = old_size >= len(s.buf); + + } + + // Read data into the buffer + loop := 0; + for { + n, err := io.read(s.r, s.buf[s.end:len(s.buf)]); + if n < 0 || len(s.buf)-s.end < n { + set_err(s, .Bad_Read_Count); + break; + } + s.end += n; + if err != nil { + set_err(s, err); + break; + } + if n > 0 { + s.successive_empty_token_count = 0; + break; + } + loop += 1; + + if s.max_consecutive_empty_reads <= 0 { + s.max_consecutive_empty_reads = DEFAULT_MAX_CONSECUTIVE_EMPTY_READS; + } + if loop > s.max_consecutive_empty_reads { + if could_be_too_short { + set_err(s, .Too_Short); + } else { + set_err(s, .No_Progress); + } + break; + } + } + } +} + +scan_bytes :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) { + if at_eof && len(data) == 0 { + return; + } + return 1, data[0:1], nil, false; +} + +scan_runes :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) { + if at_eof && len(data) == 0 { + return; + } + + if data[0] < utf8.RUNE_SELF { + advance = 1; + token = data[0:1]; + return; + } + + _, width := utf8.decode_rune(data); + if width > 1 { + advance = width; + token = data[0:width]; + return; + } + + if !at_eof && !utf8.full_rune(data) { + return; + } + + @thread_local ERROR_RUNE := []byte{0xef, 0xbf, 0xbd}; + + advance = 1; + token = ERROR_RUNE; + return; +} + +scan_words :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) { + is_space :: proc "contextless" (r: rune) -> bool { + switch r { + // lower ones + case ' ', '\t', '\n', '\v', '\f', '\r': + return true; + case '\u0085', '\u00a0': + return true; + // higher ones + case '\u2000' ..= '\u200a': + return true; + case '\u1680', '\u2028', '\u2029', '\u202f', '\u205f', '\u3000': + return true; + } + return false; + } + + // skip spaces at the beginning + start := 0; + for width := 0; start < len(data); start += width { + r: rune; + r, width = utf8.decode_rune(data[start:]); + if !is_space(r) { + break; + } + } + + for width, i := 0, start; i < len(data); i += width { + r: rune; + r, width = utf8.decode_rune(data[i:]); + if is_space(r) { + advance = i+width; + token = data[start:i]; + return; + } + } + + if at_eof && len(data) > start { + advance = len(data); + token = data[start:]; + return; + } + + advance = start; + return; +} + +scan_lines :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) { + trim_carriage_return :: proc "contextless" (data: []byte) -> []byte { + if len(data) > 0 && data[len(data)-1] == '\r' { + return data[0:len(data)-1]; + } + return data; + } + + if at_eof && len(data) == 0 { + return; + } + if i := bytes.index_byte(data, '\n'); i >= 0 { + advance = i+1; + token = trim_carriage_return(data[0:i]); + return; + } + + if at_eof { + advance = len(data); + token = trim_carriage_return(data); + } + return; +} diff --git a/src/check_expr.cpp b/src/check_expr.cpp index 16a574b3d..537fb73cc 100644 --- a/src/check_expr.cpp +++ b/src/check_expr.cpp @@ -6557,10 +6557,54 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type break; // NOTE(bill): No need to init } if (t->Struct.is_raw_union) { - if (cl->elems.count != 0) { - gbString type_str = type_to_string(type); - error(node, "Illegal compound literal type '%s'", type_str); - gb_string_free(type_str); + if (cl->elems.count > 0) { + // NOTE: unions cannot be constant + is_constant = false; + + if (cl->elems[0]->kind != Ast_FieldValue) { + gbString type_str = type_to_string(type); + error(node, "%s ('struct #raw_union') compound literals are only allowed to contain 'field = value' elements", type_str); + gb_string_free(type_str); + } else { + if (cl->elems.count != 1) { + gbString type_str = type_to_string(type); + error(node, "%s ('struct #raw_union') compound literals are only allowed to contain up to 1 'field = value' element, got %td", type_str, cl->elems.count); + gb_string_free(type_str); + } else { + Ast *elem = cl->elems[0]; + ast_node(fv, FieldValue, elem); + if (fv->field->kind != Ast_Ident) { + gbString expr_str = expr_to_string(fv->field); + error(elem, "Invalid field name '%s' in structure literal", expr_str); + gb_string_free(expr_str); + break; + } + + String name = fv->field->Ident.token.string; + + Selection sel = lookup_field(type, name, o->mode == Addressing_Type); + bool is_unknown = sel.entity == nullptr; + if (is_unknown) { + error(elem, "Unknown field '%.*s' in structure literal", LIT(name)); + break; + } + + if (sel.index.count > 1) { + error(elem, "Cannot assign to an anonymous field '%.*s' in a structure literal (at the moment)", LIT(name)); + break; + } + + Entity *field = t->Struct.fields[sel.index[0]]; + add_entity_use(c, fv->field, field); + + Operand o = {}; + check_expr_or_type(c, &o, fv->value, field->type); + + + check_assignment(c, &o, field->type, str_lit("structure literal")); + } + + } } break; } diff --git a/src/llvm_backend.cpp b/src/llvm_backend.cpp index 79364b0eb..a9a9ad0ac 100644 --- a/src/llvm_backend.cpp +++ b/src/llvm_backend.cpp @@ -3601,7 +3601,7 @@ void lb_mem_zero_ptr_internal(lbProcedure *p, LLVMValueRef ptr, LLVMValueRef len lb_type(p->module, t_int) }; unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name)); - GB_ASSERT_MSG(id != 0, "Unable to find %s.%s.%s.%s", name, LLVMPrintTypeToString(types[0]), LLVMPrintTypeToString(types[1]), LLVMPrintTypeToString(types[2])); + GB_ASSERT_MSG(id != 0, "Unable to find %s.%s.%s", name, LLVMPrintTypeToString(types[0]), LLVMPrintTypeToString(types[1])); LLVMValueRef ip = LLVMGetIntrinsicDeclaration(p->module->mod, id, types, gb_count_of(types)); LLVMValueRef args[4] = {}; @@ -6839,6 +6839,10 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc return lb_const_nil(m, original_type); } + if (is_type_raw_union(type)) { + return lb_const_nil(m, original_type); + } + isize offset = 0; if (type->Struct.custom_align > 0) { offset = 1; @@ -11141,26 +11145,27 @@ lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, lbValue x) { return res; } } else if (is_type_slice(t)) { - lbValue len = lb_emit_struct_ev(p, x, 1); + lbValue data = lb_emit_struct_ev(p, x, 0); if (op_kind == Token_CmpEq) { - res.value = LLVMBuildIsNull(p->builder, len.value, ""); + res.value = LLVMBuildIsNull(p->builder, data.value, ""); return res; } else if (op_kind == Token_NotEq) { - res.value = LLVMBuildIsNotNull(p->builder, len.value, ""); + res.value = LLVMBuildIsNotNull(p->builder, data.value, ""); return res; } } else if (is_type_dynamic_array(t)) { - lbValue cap = lb_emit_struct_ev(p, x, 2); + lbValue data = lb_emit_struct_ev(p, x, 0); if (op_kind == Token_CmpEq) { - res.value = LLVMBuildIsNull(p->builder, cap.value, ""); + res.value = LLVMBuildIsNull(p->builder, data.value, ""); return res; } else if (op_kind == Token_NotEq) { - res.value = LLVMBuildIsNotNull(p->builder, cap.value, ""); + res.value = LLVMBuildIsNotNull(p->builder, data.value, ""); return res; } } else if (is_type_map(t)) { - lbValue cap = lb_map_cap(p, x); - return lb_emit_comp(p, op_kind, cap, lb_zero(p->module, cap.type)); + lbValue hashes = lb_emit_struct_ev(p, x, 0); + lbValue data = lb_emit_struct_ev(p, hashes, 0); + return lb_emit_comp(p, op_kind, data, lb_zero(p->module, data.type)); } else if (is_type_union(t)) { if (type_size_of(t) == 0) { if (op_kind == Token_CmpEq) { @@ -11181,21 +11186,35 @@ lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, lbValue x) { } else if (is_type_soa_struct(t)) { Type *bt = base_type(t); if (bt->Struct.soa_kind == StructSoa_Slice) { - lbValue len = lb_soa_struct_len(p, x); + LLVMValueRef the_value = {}; + if (bt->Struct.fields.count == 0) { + lbValue len = lb_soa_struct_len(p, x); + the_value = len.value; + } else { + lbValue first_field = lb_emit_struct_ev(p, x, 0); + the_value = first_field.value; + } if (op_kind == Token_CmpEq) { - res.value = LLVMBuildIsNull(p->builder, len.value, ""); + res.value = LLVMBuildIsNull(p->builder, the_value, ""); return res; } else if (op_kind == Token_NotEq) { - res.value = LLVMBuildIsNotNull(p->builder, len.value, ""); + res.value = LLVMBuildIsNotNull(p->builder, the_value, ""); return res; } } else if (bt->Struct.soa_kind == StructSoa_Dynamic) { - lbValue cap = lb_soa_struct_cap(p, x); + LLVMValueRef the_value = {}; + if (bt->Struct.fields.count == 0) { + lbValue cap = lb_soa_struct_cap(p, x); + the_value = cap.value; + } else { + lbValue first_field = lb_emit_struct_ev(p, x, 0); + the_value = first_field.value; + } if (op_kind == Token_CmpEq) { - res.value = LLVMBuildIsNull(p->builder, cap.value, ""); + res.value = LLVMBuildIsNull(p->builder, the_value, ""); return res; } else if (op_kind == Token_NotEq) { - res.value = LLVMBuildIsNotNull(p->builder, cap.value, ""); + res.value = LLVMBuildIsNotNull(p->builder, the_value, ""); return res; } } @@ -13449,6 +13468,8 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) { TypeStruct *st = &bt->Struct; if (cl->elems.count > 0) { lb_addr_store(p, v, lb_const_value(p->module, type, exact_value_compound(expr))); + lbValue comp_lit_ptr = lb_addr_get_ptr(p, v); + for_array(field_index, cl->elems) { Ast *elem = cl->elems[field_index]; @@ -13477,6 +13498,12 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) { field_expr = lb_build_expr(p, elem); + lbValue gep = {}; + if (is_raw_union) { + gep = lb_emit_conv(p, comp_lit_ptr, alloc_type_pointer(ft)); + } else { + gep = lb_emit_struct_ep(p, comp_lit_ptr, cast(i32)index); + } Type *fet = field_expr.type; GB_ASSERT(fet->kind != Type_Tuple); @@ -13485,11 +13512,9 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) { if (is_type_union(ft) && !are_types_identical(fet, ft) && !is_type_untyped(fet)) { GB_ASSERT_MSG(union_variant_index(ft, fet) > 0, "%s", type_to_string(fet)); - lbValue gep = lb_emit_struct_ep(p, lb_addr_get_ptr(p, v), cast(i32)index); lb_emit_store_union_variant(p, gep, field_expr, fet); } else { lbValue fv = lb_emit_conv(p, field_expr, ft); - lbValue gep = lb_emit_struct_ep(p, lb_addr_get_ptr(p, v), cast(i32)index); lb_emit_store(p, gep, fv); } }