From 77734ea967d620541eadc770280477cf1550892e Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 27 Jul 2019 11:59:50 +0100 Subject: [PATCH] Improve the performance of simple array comparisons --- core/runtime/internal.odin | 40 +++++++++++++++++++++++++++++++++++++- src/checker.cpp | 2 ++ src/ir.cpp | 37 ++++++++++++++++++++++------------- src/types.cpp | 23 ++++++++++++++++++++++ 4 files changed, 88 insertions(+), 14 deletions(-) diff --git a/core/runtime/internal.odin b/core/runtime/internal.odin index d4d7ab84d..90d653c71 100644 --- a/core/runtime/internal.odin +++ b/core/runtime/internal.odin @@ -243,6 +243,44 @@ print_type :: proc(fd: os.Handle, ti: ^Type_Info) { } } +memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check { + x := uintptr(a); + y := uintptr(b); + n := uintptr(n); + + SU :: size_of(uintptr); + fast := uintptr(n/SU + 1); + offset := (fast-1)*SU; + curr_block := uintptr(0); + if n < SU { + fast = 0; + } + + for /**/; curr_block < fast; curr_block += 1 { + va := (^uintptr)(x + curr_block * size_of(uintptr))^; + vb := (^uintptr)(y + curr_block * size_of(uintptr))^; + if va ~ vb != 0 { + for pos := curr_block*SU; pos < n; pos += 1 { + a := (^byte)(x+pos)^; + b := (^byte)(y+pos)^; + if a ~ b != 0 { + return (int(a) - int(b)) < 0 ? -1 : +1; + } + } + } + } + + for /**/; offset < n; offset += 1 { + a := (^byte)(x+offset)^; + b := (^byte)(y+offset)^; + if a ~ b != 0 { + return (int(a) - int(b)) < 0 ? -1 : +1; + } + } + + return 0; +} + string_eq :: proc "contextless" (a, b: string) -> bool { switch { case len(a) != len(b): return false; @@ -253,7 +291,7 @@ string_eq :: proc "contextless" (a, b: string) -> bool { } string_cmp :: proc "contextless" (a, b: string) -> int { - return mem.compare_byte_ptrs(&a[0], &b[0], min(len(a), len(b))); + return memory_compare(&a[0], &b[0], min(len(a), len(b))); } string_ne :: inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b); } diff --git a/src/checker.cpp b/src/checker.cpp index f68e2ab15..ba13b2237 100644 --- a/src/checker.cpp +++ b/src/checker.cpp @@ -1613,6 +1613,8 @@ void generate_minimum_dependency_set(Checker *c, Entity *start) { str_lit("umodti3"), str_lit("udivti3"), + + str_lit("memory_compare"), }; for (isize i = 0; i < gb_count_of(required_runtime_entities); i++) { add_dependency_to_set(c, scope_lookup(c->info.runtime_package->scope, required_runtime_entities[i])); diff --git a/src/ir.cpp b/src/ir.cpp index 9f95db524..1105eac29 100644 --- a/src/ir.cpp +++ b/src/ir.cpp @@ -4126,20 +4126,31 @@ irValue *ir_emit_comp(irProcedure *proc, TokenKind op_kind, irValue *left, irVal return ir_emit_load(proc, val); } else { - irValue *val = ir_add_local_generated(proc, t_bool, false); - ir_emit_store(proc, val, res); - auto loop_data = ir_loop_start(proc, count, t_i32); - { - irValue *i = loop_data.idx; - irValue *x = ir_emit_load(proc, ir_emit_array_ep(proc, lhs, i)); - irValue *y = ir_emit_load(proc, ir_emit_array_ep(proc, rhs, i)); - irValue *cmp = ir_emit_comp(proc, op_kind, x, y); - irValue *new_res = ir_emit_arith(proc, cmp_op, ir_emit_load(proc, val), cmp, t_bool); - ir_emit_store(proc, val, ir_emit_conv(proc, new_res, t_bool)); - } - ir_loop_end(proc, loop_data); + if (is_type_simple_compare(tl) && (op_kind == Token_CmpEq || op_kind == Token_NotEq)) { + // TODO(bill): Test to see if this is actually faster!!!! + auto args = array_make(heap_allocator(), 3); + args[0] = ir_emit_conv(proc, lhs, t_rawptr); + args[1] = ir_emit_conv(proc, rhs, t_rawptr); + args[2] = ir_const_int(type_size_of(tl)); + irValue *val = ir_emit_runtime_call(proc, "memory_compare", args); + irValue *res = ir_emit_comp(proc, op_kind, val, v_zero); + return ir_emit_conv(proc, res, t_bool); + } else { + irValue *val = ir_add_local_generated(proc, t_bool, false); + ir_emit_store(proc, val, res); + auto loop_data = ir_loop_start(proc, count, t_i32); + { + irValue *i = loop_data.idx; + irValue *x = ir_emit_load(proc, ir_emit_array_ep(proc, lhs, i)); + irValue *y = ir_emit_load(proc, ir_emit_array_ep(proc, rhs, i)); + irValue *cmp = ir_emit_comp(proc, op_kind, x, y); + irValue *new_res = ir_emit_arith(proc, cmp_op, ir_emit_load(proc, val), cmp, t_bool); + ir_emit_store(proc, val, ir_emit_conv(proc, new_res, t_bool)); + } + ir_loop_end(proc, loop_data); - return ir_emit_load(proc, val); + return ir_emit_load(proc, val); + } } } diff --git a/src/types.cpp b/src/types.cpp index dc7ecb946..5cf86d6b6 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -1037,6 +1037,29 @@ Type *core_array_type(Type *t) { return t; } +// NOTE(bill): type can be easily compared using memcmp +bool is_type_simple_compare(Type *t) { + t = core_type(t); + switch (t->kind) { + case Type_Array: + return is_type_simple_compare(t->Array.elem); + + case Type_Basic: + if (t->Basic.flags & (BasicFlag_Integer|BasicFlag_Float|BasicFlag_Complex|BasicFlag_Rune|BasicFlag_Pointer)) { + return true; + } + return false; + + case Type_Pointer: + case Type_Proc: + case Type_BitSet: + case Type_BitField: + return true; + } + + return false; +} + Type *base_complex_elem_type(Type *t) { t = core_type(t); if (is_type_complex(t)) {