Improve the performance of simple array comparisons

This commit is contained in:
gingerBill
2019-07-27 11:59:50 +01:00
parent 912fc2890b
commit 77734ea967
4 changed files with 88 additions and 14 deletions

View File

@@ -243,6 +243,44 @@ print_type :: proc(fd: os.Handle, ti: ^Type_Info) {
}
}
memory_compare :: proc "contextless" (a, b: rawptr, n: int) -> int #no_bounds_check {
x := uintptr(a);
y := uintptr(b);
n := uintptr(n);
SU :: size_of(uintptr);
fast := uintptr(n/SU + 1);
offset := (fast-1)*SU;
curr_block := uintptr(0);
if n < SU {
fast = 0;
}
for /**/; curr_block < fast; curr_block += 1 {
va := (^uintptr)(x + curr_block * size_of(uintptr))^;
vb := (^uintptr)(y + curr_block * size_of(uintptr))^;
if va ~ vb != 0 {
for pos := curr_block*SU; pos < n; pos += 1 {
a := (^byte)(x+pos)^;
b := (^byte)(y+pos)^;
if a ~ b != 0 {
return (int(a) - int(b)) < 0 ? -1 : +1;
}
}
}
}
for /**/; offset < n; offset += 1 {
a := (^byte)(x+offset)^;
b := (^byte)(y+offset)^;
if a ~ b != 0 {
return (int(a) - int(b)) < 0 ? -1 : +1;
}
}
return 0;
}
string_eq :: proc "contextless" (a, b: string) -> bool {
switch {
case len(a) != len(b): return false;
@@ -253,7 +291,7 @@ string_eq :: proc "contextless" (a, b: string) -> bool {
}
string_cmp :: proc "contextless" (a, b: string) -> int {
return mem.compare_byte_ptrs(&a[0], &b[0], min(len(a), len(b)));
return memory_compare(&a[0], &b[0], min(len(a), len(b)));
}
string_ne :: inline proc "contextless" (a, b: string) -> bool { return !string_eq(a, b); }

View File

@@ -1613,6 +1613,8 @@ void generate_minimum_dependency_set(Checker *c, Entity *start) {
str_lit("umodti3"),
str_lit("udivti3"),
str_lit("memory_compare"),
};
for (isize i = 0; i < gb_count_of(required_runtime_entities); i++) {
add_dependency_to_set(c, scope_lookup(c->info.runtime_package->scope, required_runtime_entities[i]));

View File

@@ -4126,20 +4126,31 @@ irValue *ir_emit_comp(irProcedure *proc, TokenKind op_kind, irValue *left, irVal
return ir_emit_load(proc, val);
} else {
irValue *val = ir_add_local_generated(proc, t_bool, false);
ir_emit_store(proc, val, res);
auto loop_data = ir_loop_start(proc, count, t_i32);
{
irValue *i = loop_data.idx;
irValue *x = ir_emit_load(proc, ir_emit_array_ep(proc, lhs, i));
irValue *y = ir_emit_load(proc, ir_emit_array_ep(proc, rhs, i));
irValue *cmp = ir_emit_comp(proc, op_kind, x, y);
irValue *new_res = ir_emit_arith(proc, cmp_op, ir_emit_load(proc, val), cmp, t_bool);
ir_emit_store(proc, val, ir_emit_conv(proc, new_res, t_bool));
}
ir_loop_end(proc, loop_data);
if (is_type_simple_compare(tl) && (op_kind == Token_CmpEq || op_kind == Token_NotEq)) {
// TODO(bill): Test to see if this is actually faster!!!!
auto args = array_make<irValue *>(heap_allocator(), 3);
args[0] = ir_emit_conv(proc, lhs, t_rawptr);
args[1] = ir_emit_conv(proc, rhs, t_rawptr);
args[2] = ir_const_int(type_size_of(tl));
irValue *val = ir_emit_runtime_call(proc, "memory_compare", args);
irValue *res = ir_emit_comp(proc, op_kind, val, v_zero);
return ir_emit_conv(proc, res, t_bool);
} else {
irValue *val = ir_add_local_generated(proc, t_bool, false);
ir_emit_store(proc, val, res);
auto loop_data = ir_loop_start(proc, count, t_i32);
{
irValue *i = loop_data.idx;
irValue *x = ir_emit_load(proc, ir_emit_array_ep(proc, lhs, i));
irValue *y = ir_emit_load(proc, ir_emit_array_ep(proc, rhs, i));
irValue *cmp = ir_emit_comp(proc, op_kind, x, y);
irValue *new_res = ir_emit_arith(proc, cmp_op, ir_emit_load(proc, val), cmp, t_bool);
ir_emit_store(proc, val, ir_emit_conv(proc, new_res, t_bool));
}
ir_loop_end(proc, loop_data);
return ir_emit_load(proc, val);
return ir_emit_load(proc, val);
}
}
}

View File

@@ -1037,6 +1037,29 @@ Type *core_array_type(Type *t) {
return t;
}
// NOTE(bill): type can be easily compared using memcmp
bool is_type_simple_compare(Type *t) {
t = core_type(t);
switch (t->kind) {
case Type_Array:
return is_type_simple_compare(t->Array.elem);
case Type_Basic:
if (t->Basic.flags & (BasicFlag_Integer|BasicFlag_Float|BasicFlag_Complex|BasicFlag_Rune|BasicFlag_Pointer)) {
return true;
}
return false;
case Type_Pointer:
case Type_Proc:
case Type_BitSet:
case Type_BitField:
return true;
}
return false;
}
Type *base_complex_elem_type(Type *t) {
t = core_type(t);
if (is_type_complex(t)) {