diff --git a/core/slice/sort.odin b/core/slice/sort.odin index 3b4119afa..b613ff1ed 100644 --- a/core/slice/sort.odin +++ b/core/slice/sort.odin @@ -6,6 +6,8 @@ Ordering :: enum { Greater = +1, } +Generic_Cmp :: #type proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering + @(require_results) cmp :: proc(a, b: $E) -> Ordering where ORD(E) { switch { @@ -35,7 +37,16 @@ cmp_proc :: proc($E: typeid) -> (proc(E, E) -> Ordering) where ORD(E) { sort :: proc(data: $T/[]$E) where ORD(E) { when size_of(E) != 0 { if n := len(data); n > 1 { - _quick_sort_general(data, 0, n, _max_depth(n), struct{}{}, .Ordered) + raw := ([^]byte)(raw_data(data)) + _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering { + x, y := (^E)(lhs)^, (^E)(rhs)^ + if x < y { + return .Less + } else if x > y { + return .Greater + } + return .Equal + }, nil) } } } @@ -79,7 +90,18 @@ sort_with_indices :: proc(data: $T/[]$E, allocator := context.allocator) -> (ind for _, idx in indices { indices[idx] = idx } - _quick_sort_general_with_indices(data, indices, 0, n, _max_depth(n), struct{}{}, .Ordered) + raw := ([^]byte)(raw_data(indices)) + _smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering { + data := ([^]E)(user_data) + xi, yi := (^int)(lhs)^, (^int)(rhs)^ + x, y := data[xi], data[yi] + if x < y { + return .Less + } else if x > y { + return .Greater + } + return .Equal + }, raw_data(data)) } return indices } @@ -91,7 +113,39 @@ sort_with_indices :: proc(data: $T/[]$E, allocator := context.allocator) -> (ind sort_by :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool) { when size_of(E) != 0 { if n := len(data); n > 1 { - _quick_sort_general(data, 0, n, _max_depth(n), less, .Less) + raw := ([^]byte)(raw_data(data)) + _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering { + x, y := (^E)(lhs)^, (^E)(rhs)^ + less := (proc(E, E) -> bool)(user_data) + switch { + case less(x, y): return .Less + case less(y, x): return .Greater + } + return .Equal + }, rawptr(less)) + } + } +} + +sort_by_with_data :: proc(data: $T/[]$E, less: proc(i, j: E, user_data: rawptr) -> bool, user_data: rawptr) { + when size_of(E) != 0 { + if n := len(data); n > 1 { + Context :: struct { + less: proc(i, j: E, user_data: rawptr) -> bool, + user_data: rawptr, + } + ctx := &Context{less, user_data} + + raw := ([^]byte)(raw_data(data)) + _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering { + x, y := (^E)(lhs)^, (^E)(rhs)^ + ctx := (^Context)(user_data) + switch { + case ctx.less(x, y, ctx.user_data): return .Less + case ctx.less(y, x, ctx.user_data): return .Greater + } + return .Equal + }, ctx) } } } @@ -105,8 +159,55 @@ sort_by_with_indices :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool, allocat for _, idx in indices { indices[idx] = idx } - _quick_sort_general_with_indices(data, indices, 0, n, _max_depth(n), less, .Less) - return indices + + Context :: struct{ + less: proc(i, j: E) -> bool, + data: T, + } + ctx := &Context{less, data} + + raw := ([^]byte)(raw_data(indices)) + _smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering { + ctx := (^Context)(user_data) + xi, yi := (^int)(lhs)^, (^int)(rhs)^ + x, y := ctx.data[xi], ctx.data[yi] + switch { + case ctx.less(x, y): return .Less + case ctx.less(y, x): return .Greater + } + return .Equal + }, ctx) + } + } + return indices +} + +sort_by_with_indices_with_data :: proc(data: $T/[]$E, less: proc(i, j: E, user_data: rawptr) -> bool, user_data: rawptr, allocator := context.allocator) -> (indices : []int) { + indices = make([]int, len(data), allocator) + when size_of(E) != 0 { + if n := len(data); n > 1 { + for _, idx in indices { + indices[idx] = idx + } + + Context :: struct{ + less: proc(i, j: E, user_data: rawptr) -> bool, + data: T, + user_data: rawptr, + } + ctx := &Context{less, data, user_data} + + raw := ([^]byte)(raw_data(indices)) + _smoothsort(raw, uint(len(indices)), size_of(int), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering { + ctx := (^Context)(user_data) + xi, yi := (^int)(lhs)^, (^int)(rhs)^ + x, y := ctx.data[xi], ctx.data[yi] + switch { + case ctx.less(x, y, ctx.user_data): return .Less + case ctx.less(y, x, ctx.user_data): return .Greater + } + return .Equal + }, ctx) } } return indices @@ -115,11 +216,47 @@ sort_by_with_indices :: proc(data: $T/[]$E, less: proc(i, j: E) -> bool, allocat sort_by_cmp :: proc(data: $T/[]$E, cmp: proc(i, j: E) -> Ordering) { when size_of(E) != 0 { if n := len(data); n > 1 { - _quick_sort_general(data, 0, n, _max_depth(n), cmp, .Cmp) + raw := ([^]byte)(raw_data(data)) + _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering { + x, y := (^E)(lhs)^, (^E)(rhs)^ + cmp := cast(proc(E, E) -> Ordering)(user_data) + return cmp(x, y) + }, rawptr(cmp)) } } } + +sort_by_cmp_with_data :: proc(data: $T/[]$E, cmp: proc(i, j: E, user_data: rawptr) -> Ordering, user_data: rawptr) { + when size_of(E) != 0 { + if n := len(data); n > 1 { + Context :: struct{ + cmp: proc(i, j: E, user_data: rawptr) -> Ordering, + user_data: rawptr, + } + ctx := &Context{cmp, user_data} + + raw := ([^]byte)(raw_data(data)) + _smoothsort(raw, uint(len(data)), size_of(E), proc(lhs, rhs: rawptr, user_data: rawptr) -> Ordering { + x, y := (^E)(lhs)^, (^E)(rhs)^ + ctx := (^Context)(user_data) + return ctx.cmp(x, y, ctx.user_data) + }, ctx) + } + } +} + + +sort_by_generic_cmp :: proc(data: $T/[]$E, cmp: Generic_Cmp, user_data: rawptr) { + when size_of(E) != 0 { + if n := len(data); n > 1 { + raw := ([^]byte)(raw_data(data)) + _smoothsort(raw, uint(len(data)), size_of(E), cmp, user_data) + } + } +} + + // stable_sort sorts a slice stable_sort :: proc(data: $T/[]$E) where ORD(E) { when size_of(E) != 0 { diff --git a/core/slice/sort_private.odin b/core/slice/sort_private.odin index 36637c4cd..efa9c596b 100644 --- a/core/slice/sort_private.odin +++ b/core/slice/sort_private.odin @@ -1,6 +1,7 @@ #+private package slice +import "base:builtin" import "base:intrinsics" _ :: intrinsics @@ -12,171 +13,6 @@ Sort_Kind :: enum { Cmp, } -_quick_sort_general :: proc(data: $T/[]$E, a, b, max_depth: int, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check { - less :: #force_inline proc(a, b: E, call: P) -> bool { - when KIND == .Ordered { - return a < b - } else when KIND == .Less { - return call(a, b) - } else when KIND == .Cmp { - return call(a, b) == .Less - } else { - #panic("unhandled Sort_Kind") - } - } - - insertion_sort :: proc(data: $T/[]$E, a, b: int, call: P) #no_bounds_check { - for i in a+1.. a && less(data[j], data[j-1], call); j -= 1 { - swap(data, j, j-1) - } - } - } - - heap_sort :: proc(data: $T/[]$E, a, b: int, call: P) #no_bounds_check { - sift_down :: proc(data: T, lo, hi, first: int, call: P) #no_bounds_check { - root := lo - for { - child := 2*root + 1 - if child >= hi { - break - } - if child+1 < hi && less(data[first+child], data[first+child+1], call) { - child += 1 - } - if !less(data[first+root], data[first+child], call) { - return - } - swap(data, first+root, first+child) - root = child - } - } - - - first, lo, hi := a, 0, b-a - - for i := (hi-1)/2; i >= 0; i -= 1 { - sift_down(data, i, hi, first, call) - } - - for i := hi-1; i >= 0; i -= 1 { - swap(data, first, first+i) - sift_down(data, lo, i, first, call) - } - } - - median3 :: proc(data: T, m1, m0, m2: int, call: P) #no_bounds_check { - if less(data[m1], data[m0], call) { - swap(data, m1, m0) - } - if less(data[m2], data[m1], call) { - swap(data, m2, m1) - if less(data[m1], data[m0], call) { - swap(data, m1, m0) - } - } - } - - do_pivot :: proc(data: T, lo, hi: int, call: P) -> (midlo, midhi: int) #no_bounds_check { - m := int(uint(lo+hi)>>1) - if hi-lo > 40 { - s := (hi-lo)/8 - median3(data, lo, lo+s, lo+s*2, call) - median3(data, m, m-s, m+s, call) - median3(data, hi-1, hi-1-s, hi-1-s*2, call) - } - median3(data, lo, m, hi-1, call) - - pivot := lo - a, c := lo+1, hi-1 - - - for ; a < c && less(data[a], data[pivot], call); a += 1 { - } - b := a - - for { - for ; b < c && !less(data[pivot], data[b], call); b += 1 { // data[b] <= pivot - } - for ; b < c && less(data[pivot], data[c-1], call); c -=1 { // data[c-1] > pivot - } - if b >= c { - break - } - - swap(data, b, c-1) - b += 1 - c -= 1 - } - - protect := hi-c < 5 - if !protect && hi-c < (hi-lo)/4 { - dups := 0 - if !less(data[pivot], data[hi-1], call) { - swap(data, c, hi-1) - c += 1 - dups += 1 - } - if !less(data[b-1], data[pivot], call) { - b -= 1 - dups += 1 - } - - if !less(data[m], data[pivot], call) { - swap(data, m, b-1) - b -= 1 - dups += 1 - } - protect = dups > 1 - } - if protect { - for { - for ; a < b && !less(data[b-1], data[pivot], call); b -= 1 { - } - for ; a < b && less(data[a], data[pivot], call); a += 1 { - } - if a >= b { - break - } - swap(data, a, b-1) - a += 1 - b -= 1 - } - } - swap(data, pivot, b-1) - return b-1, c - } - - - a, b, max_depth := a, b, max_depth - - for b-a > 12 { // only use shell sort for lengths <= 12 - if max_depth == 0 { - heap_sort(data, a, b, call) - return - } - max_depth -= 1 - mlo, mhi := do_pivot(data, a, b, call) - if mlo-a < b-mhi { - _quick_sort_general(data, a, mlo, max_depth, call, KIND) - a = mhi - } else { - _quick_sort_general(data, mhi, b, max_depth, call, KIND) - b = mlo - } - } - if b-a > 1 { - // Shell short with gap 6 - for i in a+6.. bool { when KIND == .Ordered { @@ -200,179 +36,181 @@ _stable_sort_general :: proc(data: $T/[]$E, call: $P, $KIND: Sort_Kind) where (O } } -_quick_sort_general_with_indices :: proc(data: $T/[]$E, indices: []int, a, b, max_depth: int, call: $P, $KIND: Sort_Kind) where (ORD(E) && KIND == .Ordered) || (KIND != .Ordered) #no_bounds_check { - less :: #force_inline proc(a, b: E, call: P) -> bool { - when KIND == .Ordered { - return a < b - } else when KIND == .Less { - return call(a, b) - } else when KIND == .Cmp { - return call(a, b) == .Less - } else { - #panic("unhandled Sort_Kind") +@(private) +_smoothsort :: proc(base: [^]byte, nel: uint, width: uint, cmp: Generic_Cmp, arg: rawptr) { + pntz :: proc "contextless" (p: [2]uint) -> int { + r := intrinsics.count_trailing_zeros(p[0] - 1) + if r != 0 { + return int(r) } + r = (8*size_of(uint) + intrinsics.count_trailing_zeros(p[1])) + if r != 8*size_of(uint) { + return int(r) + } + return 0 } - insertion_sort :: proc(data: $T/[]$E, indices: []int, a, b: int, call: P) #no_bounds_check { - for i in a+1.. a && less(data[j], data[j-1], call); j -= 1 { - swap(data, j, j-1) - swap(indices, j, j-1) - } + shl :: proc "contextless" (p: []uint, n: int) { + n := n + if n >= 8*size_of(uint) { + n -= 8*size_of(uint) + p[1] = p[0] + p[0] = 0 } + p[1] <<= uint(n) + p[0] |= p[0] >> uint(8*size_of(uint) - n) + p[0] <<= uint(n) + } + shr :: proc "contextless" (p: []uint, n: int) { + n := n + if n >= 8*size_of(uint) { + n -= 8*size_of(uint) + p[0] = p[1] + p[1] = 0 + } + p[0] >>= uint(n) + p[0] |= p[1] << uint(8*size_of(uint) - n) + p[1] >>= uint(n) } - heap_sort :: proc(data: $T/[]$E, indices: []int, a, b: int, call: P) #no_bounds_check { - sift_down :: proc(data: T, indices: []int, lo, hi, first: int, call: P) #no_bounds_check { - root := lo - for { - child := 2*root + 1 - if child >= hi { - break - } - if child+1 < hi && less(data[first+child], data[first+child+1], call) { - child += 1 - } - if !less(data[first+root], data[first+child], call) { - return - } - swap(data, first+root, first+child) - swap(indices, first+root, first+child) - root = child - } - } - - - first, lo, hi := a, 0, b-a - - for i := (hi-1)/2; i >= 0; i -= 1 { - sift_down(data, indices, i, hi, first, call) - } - - for i := hi-1; i >= 0; i -= 1 { - swap(data, first, first+i) - swap(indices, first, first+i) - sift_down(data, indices, lo, i, first, call) - } - } - - median3 :: proc(data: T, indices: []int, m1, m0, m2: int, call: P) #no_bounds_check { - if less(data[m1], data[m0], call) { - swap(data, m1, m0) - swap(indices, m1, m0) - } - if less(data[m2], data[m1], call) { - swap(data, m2, m1) - swap(indices, m2, m1) - if less(data[m1], data[m0], call) { - swap(data, m1, m0) - swap(indices, m1, m0) - } - } - } - - do_pivot :: proc(data: T, indices: []int, lo, hi: int, call: P) -> (midlo, midhi: int) #no_bounds_check { - m := int(uint(lo+hi)>>1) - if hi-lo > 40 { - s := (hi-lo)/8 - median3(data, indices, lo, lo+s, lo+s*2, call) - median3(data, indices, m, m-s, m+s, call) - median3(data, indices, hi-1, hi-1-s, hi-1-s*2, call) - } - median3(data, indices, lo, m, hi-1, call) - - pivot := lo - a, c := lo+1, hi-1 - - - for ; a < c && less(data[a], data[pivot], call); a += 1 { - } - b := a - - for { - for ; b < c && !less(data[pivot], data[b], call); b += 1 { // data[b] <= pivot - } - for ; b < c && less(data[pivot], data[c-1], call); c -=1 { // data[c-1] > pivot - } - if b >= c { - break - } - - swap(data, b, c-1) - swap(indices, b, c-1) - b += 1 - c -= 1 - } - - protect := hi-c < 5 - if !protect && hi-c < (hi-lo)/4 { - dups := 0 - if !less(data[pivot], data[hi-1], call) { - swap(data, c, hi-1) - swap(indices, c, hi-1) - c += 1 - dups += 1 - } - if !less(data[b-1], data[pivot], call) { - b -= 1 - dups += 1 - } - - if !less(data[m], data[pivot], call) { - swap(data, m, b-1) - swap(indices, m, b-1) - b -= 1 - dups += 1 - } - protect = dups > 1 - } - if protect { - for { - for ; a < b && !less(data[b-1], data[pivot], call); b -= 1 { - } - for ; a < b && less(data[a], data[pivot], call); a += 1 { - } - if a >= b { - break - } - swap(data, a, b-1) - swap(indices, a, b-1) - a += 1 - b -= 1 - } - } - swap(data, pivot, b-1) - swap(indices, pivot, b-1) - return b-1, c - } - - assert(len(data) == len(indices)) - - a, b, max_depth := a, b, max_depth - - for b-a > 12 { // only use shell sort for lengths <= 12 - if max_depth == 0 { - heap_sort(data, indices, a, b, call) + cycle :: proc "contextless" (width: uint, data: [][^]byte, n: int) { + if len(data) < 2 { return } - max_depth -= 1 - mlo, mhi := do_pivot(data, indices, a, b, call) - if mlo-a < b-mhi { - _quick_sort_general_with_indices(data, indices, a, mlo, max_depth, call, KIND) - a = mhi - } else { - _quick_sort_general_with_indices(data, indices, mhi, b, max_depth, call, KIND) - b = mlo + buf: [256]u8 = --- + data[n] = raw_data(buf[:]) + width := width + for width != 0 { + l := builtin.min(size_of(buf), int(width)) + copy(data[n][:l], data[0][:l]) + for i in 0.. 1 { - // Shell short with gap 6 - for i in a+6.. 1 { + rt := head[-width:] + lf := head[-width:][-lp[pshift - 2]:] + if cmp(buf[0], lf, arg) >= .Equal && cmp(buf[0], rt, arg) >= .Equal { + break + } + if cmp(lf, rt, arg) >= .Equal { + buf[i], head = lf, lf + pshift -= 1 + } else { + buf[i], head = rt, rt + pshift -= 2 + } + i += 1 + } + cycle(width, buf[:], i) + } + + trinkle :: proc(head: [^]byte, width: uint, cmp: Generic_Cmp, arg: rawptr, pp: []uint, pshift: int, trusty: bool, lp: []uint) { + head := head + + p := [2]uint{pp[0], pp[1]} + + buf: [14*size_of(uint)+1][^]byte = --- + buf[0] = head + + i := 1 + trail := 0 + pshift := pshift + trusty := trusty + for p[0] != 1 || p[1] != 0 { + stepson := head[-lp[pshift]:] + if cmp(stepson, buf[0], arg) <= .Equal { + break + } + if !trusty && pshift > 1 { + rt := head[-width:] + lf := head[-width:][-lp[pshift-2]:] + if cmp(rt, stepson, arg) >= .Equal || cmp(lf, stepson, arg) >= .Equal { + break + } + } + buf[i] = stepson + head = stepson + trail = pntz(p) + shr(p[:], trail) + pshift += trail + trusty = false + i += 1 + } + if trusty { + return + } + cycle(width, buf[:], i) + sift(head, width, cmp, arg, pshift, lp) + } + + size := nel * width + if size == 0 { + return + } + + lp: [12*size_of(uint)]uint = --- + lp[1] = width + lp[0] = lp[1] + for i := 2; true; i += 1 { + lp[i] = lp[i-2] + lp[i-1] + width + if lp[i] >= size { + break + } + } + + head := base + high := head[size - width:] + p := [2]uint{1, 0} + pshift := 1 + for head < high { + if (p[0] & 3) == 3 { + sift(head, width, cmp, arg, pshift, lp[:]) + shr(p[:], 2) + pshift += 2 + } else { + if lp[pshift - 1] >= uint(uintptr(high) - uintptr(head)) { + trinkle(head, width, cmp, arg, p[:], pshift, false, lp[:]) + } else { + sift(head, width, cmp, arg, pshift, lp[:]) + } + if pshift == 1 { + shl(p[:], 1) + pshift = 0 + } else { + shl(p[:], pshift - 1) + pshift = 1 } } - insertion_sort(data, indices, a, b, call) + p[0] |= 1 + head = head[width:] } -} + trinkle(head, width, cmp, arg, p[:], pshift, false, lp[:]) + for pshift != 1 || p[0] != 1 || p[1] != 0 { + if pshift <= 1 { + trail := pntz(p) + shr(p[:], trail) + pshift += trail + } else { + shl(p[:], 2) + pshift -= 2 + p[0] ~= 7 + shr(p[:], 1) + trinkle(head[-width:][-lp[pshift]:], width, cmp, arg, p[:], pshift + 1, true, lp[:]) + shl(p[:], 1) + p[0] |= 1 + trinkle(head[-width:], width, cmp, arg, p[:], pshift, true, lp[:]) + } + head = head[-width:] + } +} \ No newline at end of file