Optimize append_elem for different optimization levels

* For `-o:size` and below, uses the type erased approach
* For `-o:speed` and above, the inlined form is used

This is necessary because a generic `mem_copy_non_overlapping` cannot be optimized when type erasure is used, meaning in a hot path where `append_elem` is used a lot; thus `mem_copy_non_overlapping` becomes a bottleneck.
This commit is contained in:
gingerBill
2026-06-19 11:19:16 +01:00
parent 7b58aa8eba
commit de0d2ae178

View File

@@ -715,10 +715,10 @@ _append_elem :: #force_no_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, a
if array.cap < array.len+1 {
// Same behavior as _append_elems but there's only one arg, so we always just add DEFAULT_DYNAMIC_ARRAY_CAPACITY.
cap := 2 * array.cap + DEFAULT_DYNAMIC_ARRAY_CAPACITY
cap := max(2 * array.cap, DEFAULT_DYNAMIC_ARRAY_CAPACITY)
// do not 'or_return' here as it could be a partial success
err = _reserve_dynamic_array(array, size_of_elem, align_of_elem, cap, should_zero, loc)
err = _reserve_dynamic_array_unsafe(array, size_of_elem, align_of_elem, cap, should_zero, loc)
}
if array.cap-array.len > 0 {
data := ([^]byte)(array.data)
@@ -735,11 +735,37 @@ _append_elem :: #force_no_inline proc(array: ^Raw_Dynamic_Array, size_of_elem, a
@builtin
append_elem :: proc(array: ^$T/[dynamic]$E, #no_broadcast arg: E, loc := #caller_location) -> (num_appended: int, err: Allocator_Error) #optional_allocator_error {
when size_of(E) == 0 {
if array == nil {
return
}
(^Raw_Dynamic_Array)(array).len += 1
return 1, nil
} else {
} else when ODIN_OPTIMIZATION_MODE <= .Size {
arg := arg
return _append_elem((^Raw_Dynamic_Array)(array), size_of(E), align_of(E), &arg, true, loc=loc)
} else {
if array == nil {
return
}
arg := arg
arr := (^Raw_Dynamic_Array)(array)
if arr.cap < arr.len+1 {
// Same behavior as _append_elems but there's only one arg, so we always just add DEFAULT_DYNAMIC_ARRAY_CAPACITY.
cap := max(2 * arr.cap, DEFAULT_DYNAMIC_ARRAY_CAPACITY)
// do not 'or_return' here as it could be a partial success
err = _reserve_dynamic_array_unsafe(arr, size_of(E), align_of(E), cap, true, loc)
}
if arr.cap-arr.len > 0 {
// NOTE(bill, 2026-06-19): When this is in the hot path with -o:speed or -o:aggressive enabled,
// this code path cannot rely on type erasure and `mem_copy_non_overlapping`.
// So directly inlining the call and storing the argument like this helps the optimize a lot
assert(arr.data != nil, loc=loc)
([^]E)(arr.data)[arr.len] = arg
arr.len += 1
num_appended = 1
}
return
}
}
@@ -1311,6 +1337,35 @@ _reserve_dynamic_array :: #force_no_inline proc(a: ^Raw_Dynamic_Array, size_of_e
return nil
}
_reserve_dynamic_array_unsafe :: #force_no_inline proc(a: ^Raw_Dynamic_Array, size_of_elem, align_of_elem: int, capacity: int, should_zero: bool, loc := #caller_location) -> Allocator_Error {
if capacity <= a.cap {
return nil
}
if a.allocator.procedure == nil {
a.allocator = context.allocator
assert(a.allocator.procedure != nil)
}
old_size := a.cap * size_of_elem
new_size := capacity * size_of_elem
allocator := a.allocator
new_data: []byte
if should_zero {
new_data = mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
} else {
new_data = non_zero_mem_resize(a.data, old_size, new_size, align_of_elem, allocator, loc) or_return
}
if new_data == nil && new_size > 0 {
return .Out_Of_Memory
}
a.data = raw_data(new_data)
a.cap = capacity
return nil
}
// `reserve_dynamic_array` will try to reserve memory of a passed dynamic array or map to the requested element count (setting the `cap`).
//
// When a memory resize allocation is required, the memory will be asked to be zeroed (i.e. it calls `mem_resize`).