From 1c76577918dc6bb7d3761501b0e137719c65accd Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sat, 3 Jul 2021 15:16:44 +0100 Subject: [PATCH] Add `slice.sort_by_cmp` Ordering based sorting algorithms --- core/slice/sort.odin | 237 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 229 insertions(+), 8 deletions(-) diff --git a/core/slice/sort.odin b/core/slice/sort.odin index 1e7051de5..2dd3bb852 100644 --- a/core/slice/sort.odin +++ b/core/slice/sort.odin @@ -5,6 +5,33 @@ _ :: intrinsics; ORD :: intrinsics.type_is_ordered; +Ordering :: enum { + Less = -1, + Equal = 0, + Greater = +1, +} + +cmp :: proc(a, b: $E) -> Ordering where ORD(E) { + switch { + case a < b: + return .Less; + case a > b: + return .Greater; + } + return .Equal; +} + +cmp_proc :: proc($E: typeid) -> (proc(E, E) -> Ordering) where ORD(E) { + return proc(a, b: E) -> Ordering { + switch { + case a < b: + return .Less; + case a > b: + return .Greater; + } + return .Equal; + }; +} // sort sorts a slice // This sort is not guaranteed to be stable @@ -21,7 +48,15 @@ sort :: proc(data: $T/[]$E) where ORD(E) { sort_by :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool) { when size_of(E) != 0 { if n := len(data); n > 1 { - _quick_sort_proc(data, 0, n, _max_depth(n), less); + _quick_sort_less(data, 0, n, _max_depth(n), less); + } + } +} + +sort_by_cmp :: proc(data: $T/[]$E, cmp: proc(i, j: E) -> Ordering) { + when size_of(E) != 0 { + if n := len(data); n > 1 { + _quick_sort_cmp(data, 0, n, _max_depth(n), cmp); } } } @@ -44,6 +79,16 @@ is_sorted_by :: proc(array: $T/[]$E, less: proc(i, j: E) -> bool) -> bool { return true; } +is_sorted_cmp :: proc(array: $T/[]$E, cmp: proc(i, j: E) -> Ordering) -> bool { + for i := len(array)-1; i > 0; i -= 1 { + if cmp(array[i], array[i-1]) == .Equal { + return false; + } + } + return true; +} + + reverse_sort :: proc(data: $T/[]$E) where ORD(E) { sort_by(data, proc(i, j: E) -> bool { @@ -52,6 +97,23 @@ reverse_sort :: proc(data: $T/[]$E) where ORD(E) { } +reverse_sort_by :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool) where ORD(E) { + context._internal = rawptr(less); + sort_by(data, proc(i, j: E) -> bool { + k := (proc(i, j: E) -> bool)(context._internal); + return k(j, i); + }); +} + +reverse_sort_by_cmp :: proc(data: $T/[]$E, cmp: proc(i, j: E) -> Ordering) where ORD(E) { + context._internal = rawptr(cmp); + sort_by_cmp(data, proc(i, j: E) -> Ordering { + k := (proc(i, j: E) -> Ordering)(context._internal); + return k(j, i); + }); +} + + // TODO(bill): Should `sort_by_key` exist or is `sort_by` more than enough? sort_by_key :: proc(data: $T/[]$E, key: proc(E) -> $K) where ORD(K) { context._internal = rawptr(key); @@ -250,7 +312,7 @@ _heap_sort :: proc(data: $T/[]$E, a, b: int) where ORD(E) { @(private) -_quick_sort_proc :: proc(data: $T/[]$E, a, b, max_depth: int, less: proc(i, j: E) -> bool) { +_quick_sort_less :: proc(data: $T/[]$E, a, b, max_depth: int, less: proc(i, j: E) -> bool) { median3 :: proc(data: T, m1, m0, m2: int, less: proc(i, j: E) -> bool) { if less(data[m1], data[m0]) { swap(data, m1, m0); @@ -337,16 +399,16 @@ _quick_sort_proc :: proc(data: $T/[]$E, a, b, max_depth: int, less: proc(i, j: E if b-a > 12 { // only use shell sort for lengths <= 12 if max_depth == 0 { - _heap_sort_proc(data, a, b, less); + _heap_sort_less(data, a, b, less); return; } max_depth -= 1; mlo, mhi := do_pivot(data, a, b, less); if mlo-a < b-mhi { - _quick_sort_proc(data, a, mlo, max_depth, less); + _quick_sort_less(data, a, mlo, max_depth, less); a = mhi; } else { - _quick_sort_proc(data, mhi, b, max_depth, less); + _quick_sort_less(data, mhi, b, max_depth, less); b = mlo; } } @@ -357,12 +419,12 @@ _quick_sort_proc :: proc(data: $T/[]$E, a, b, max_depth: int, less: proc(i, j: E swap(data, i, i-6); } } - _insertion_sort_proc(data, a, b, less); + _insertion_sort_less(data, a, b, less); } } @(private) -_insertion_sort_proc :: proc(data: $T/[]$E, a, b: int, less: proc(i, j: E) -> bool) { +_insertion_sort_less :: proc(data: $T/[]$E, a, b: int, less: proc(i, j: E) -> bool) { for i in a+1.. a && less(data[j], data[j-1]); j -= 1 { swap(data, j, j-1); @@ -371,7 +433,7 @@ _insertion_sort_proc :: proc(data: $T/[]$E, a, b: int, less: proc(i, j: E) -> bo } @(private) -_heap_sort_proc :: proc(data: $T/[]$E, a, b: int, less: proc(i, j: E) -> bool) { +_heap_sort_less :: proc(data: $T/[]$E, a, b: int, less: proc(i, j: E) -> bool) { sift_down :: proc(data: T, lo, hi, first: int, less: proc(i, j: E) -> bool) { root := lo; for { @@ -405,3 +467,162 @@ _heap_sort_proc :: proc(data: $T/[]$E, a, b: int, less: proc(i, j: E) -> bool) { + + + +@(private) +_quick_sort_cmp :: proc(data: $T/[]$E, a, b, max_depth: int, cmp: proc(i, j: E) -> Ordering) { + median3 :: proc(data: T, m1, m0, m2: int, cmp: proc(i, j: E) -> Ordering) { + if cmp(data[m1], data[m0]) == .Less { + swap(data, m1, m0); + } + if cmp(data[m2], data[m1]) == .Less { + swap(data, m2, m1); + if cmp(data[m1], data[m0]) == .Less { + swap(data, m1, m0); + } + } + } + + do_pivot :: proc(data: T, lo, hi: int, cmp: proc(i, j: E) -> Ordering) -> (midlo, midhi: int) { + m := int(uint(lo+hi)>>1); + if hi-lo > 40 { + s := (hi-lo)/8; + median3(data, lo, lo+s, lo+s*2, cmp); + median3(data, m, m-s, m+s, cmp); + median3(data, hi-1, hi-1-s, hi-1-s*2, cmp); + } + median3(data, lo, m, hi-1, cmp); + + pivot := lo; + a, c := lo+1, hi-1; + + for ; a < c && cmp(data[a], data[pivot]) == .Less; a += 1 { + } + b := a; + + for { + for ; b < c && cmp(data[pivot], data[b]) >= .Equal; b += 1 { // data[b] <= pivot + } + for ; b < c && cmp(data[pivot], data[c-1]) == .Less; c -=1 { // data[c-1] > pivot + } + if b >= c { + break; + } + + swap(data, b, c-1); + b += 1; + c -= 1; + } + + protect := hi-c < 5; + if !protect && hi-c < (hi-lo)/4 { + dups := 0; + if cmp(data[pivot], data[hi-1]) != .Less { + swap(data, c, hi-1); + c += 1; + dups += 1; + } + if cmp(data[b-1], data[pivot]) != .Less { + b -= 1; + dups += 1; + } + + if cmp(data[m], data[pivot]) != .Less { + swap(data, m, b-1); + b -= 1; + dups += 1; + } + protect = dups > 1; + } + if protect { + for { + for ; a < b && cmp(data[b-1], data[pivot]) >= .Equal; b -= 1 { + } + for ; a < b && cmp(data[a], data[pivot]) == .Less; a += 1 { + } + if a >= b { + break; + } + swap(data, a, b-1); + a += 1; + b -= 1; + } + } + swap(data, pivot, b-1); + return b-1, c; + } + + + a, b, max_depth := a, b, max_depth; + + if b-a > 12 { // only use shell sort for lengths <= 12 + if max_depth == 0 { + _heap_sort_cmp(data, a, b, cmp); + return; + } + max_depth -= 1; + mlo, mhi := do_pivot(data, a, b, cmp); + if mlo-a < b-mhi { + _quick_sort_cmp(data, a, mlo, max_depth, cmp); + a = mhi; + } else { + _quick_sort_cmp(data, mhi, b, max_depth, cmp); + b = mlo; + } + } + if b-a > 1 { + // Shell short with gap 6 + for i in a+6.. Ordering) { + for i in a+1.. a && cmp(data[j], data[j-1]) == .Less; j -= 1 { + swap(data, j, j-1); + } + } +} + +@(private) +_heap_sort_cmp :: proc(data: $T/[]$E, a, b: int, cmp: proc(i, j: E) -> Ordering) { + sift_down :: proc(data: T, lo, hi, first: int, cmp: proc(i, j: E) -> Ordering) { + root := lo; + for { + child := 2*root + 1; + if child >= hi { + break; + } + if child+1 < hi && cmp(data[first+child], data[first+child+1]) == .Less { + child += 1; + } + if cmp(data[first+root], data[first+child]) >= .Equal { + return; + } + swap(data, first+root, first+child); + root = child; + } + } + + + first, lo, hi := a, 0, b-a; + + for i := (hi-1)/2; i >= 0; i -= 1 { + sift_down(data, i, hi, first, cmp); + } + + for i := hi-1; i >= 0; i -= 1 { + swap(data, first, first+i); + sift_down(data, lo, i, first, cmp); + } +} + + +