Heavily improve reading and writing to bit fields

This commit is contained in:
gingerBill
2026-06-15 16:53:15 +01:00
parent 9ec6f3e378
commit 6feb33a79b
2 changed files with 217 additions and 83 deletions

View File

@@ -957,6 +957,133 @@ gb_internal LLVMValueRef OdinLLVMBuildLoadAligned(lbProcedure *p, LLVMTypeRef ty
return result;
}
// gb_internal void OdinLLVMBuildUnalignedStore(lbProcedure *p, LLVMValueRef dst, LLVMValueRef src, Type *ptr_type) {
// Type *t = type_deref(ptr_type);
// if (is_type_simd_vector(t)) {
// LLVMValueRef store = LLVMBuildStore(p->builder, src, dst);
// LLVMSetAlignment(store, 1);
// } else {
// lb_mem_copy_non_overlapping(p, {dst, ptr_type}, {src, ptr_type}, lb_const_int(p->module, t_int, type_size_of(t)), false);
// }
// }
gb_internal LLVMValueRef OdinLLVMBuildUnalignedLoad(lbProcedure *p, LLVMValueRef src, Type *ptr_type) {
LLVMTypeRef type = lb_type(p->module, type_deref(ptr_type));
src = LLVMBuildPointerCast(p->builder, src, lb_type(p->module, ptr_type), "");
LLVMValueRef load = LLVMBuildLoad2(p->builder, type, src, "");
LLVMSetAlignment(load, 1);
return load;
}
gb_internal void lb_copy_bits(lbProcedure *p,
LLVMValueRef dst,
LLVMValueRef src,
u64 buf_bytes,
u64 dst_bit,
u64 src_bit,
u64 size_bits
) {
auto ptr_offset = [](lbProcedure *p, LLVMValueRef ptr, u64 offset) -> LLVMValueRef {
LLVMValueRef indices[1] = {LLVMConstInt(lb_type(p->module, t_u64), offset, false)};
ptr = LLVMBuildPointerCast(p->builder, ptr, lb_type(p->module, t_u8_ptr), "");
return LLVMBuildGEP2(p->builder, lb_type(p->module, t_u8), ptr, indices, 1, "");
};
Type *t_u32_ptr = alloc_type_pointer(t_u32);
Type *t_u64_ptr = alloc_type_pointer(t_u64);
LLVMTypeRef llvm_u32 = lb_type(p->module, t_u32);
LLVMTypeRef llvm_u64 = lb_type(p->module, t_u64);
LLVMTypeRef llvm_u32_ptr = lb_type(p->module, t_u32_ptr);
LLVMTypeRef llvm_u64_ptr = lb_type(p->module, t_u64_ptr);
dst = LLVMBuildPointerCast(p->builder, dst, lb_type(p->module, t_u8_ptr), "");
src = LLVMBuildPointerCast(p->builder, src, lb_type(p->module, t_u8_ptr), "");
u64 src_byte = src_bit>>3;
u64 dst_byte = dst_bit>>3;
u64 src_shift = src_bit&7;
u64 dst_shift = dst_bit&7;
u64 src_need_bytes = (src_shift + size_bits + 7)>>3;
LLVMValueRef a = LLVMConstInt(llvm_u64, 0, false);
LLVMValueRef b = LLVMConstInt(llvm_u64, 0, false);
if (src_need_bytes <= 4) {
// a = u64(intrinsics.unaligned_load((^u32)(&src[src_byte])))
a = OdinLLVMBuildUnalignedLoad(p, ptr_offset(p, src, src_byte), t_u32_ptr);
a = LLVMBuildZExt(p->builder, a, llvm_u64, "");
} else {
// a = intrinsics.unaligned_load((^u64)(&src[src_byte]))
a = OdinLLVMBuildUnalignedLoad(p, ptr_offset(p, src, src_byte), t_u64_ptr);
// b = intrinsics.unaligned_load((^u64)(&src[src_byte + 8]))
b = OdinLLVMBuildUnalignedLoad(p, ptr_offset(p, src, src_byte + 8), t_u64_ptr);
}
// bits := (a >> src_shift) | (b << (64 - src_shift))
LLVMValueRef bits = LLVMBuildOr(p->builder,
LLVMBuildLShr(p->builder, a, LLVMConstInt(llvm_u64, src_shift, false), ""),
LLVMBuildShl (p->builder, b, LLVMConstInt(llvm_u64, 64 - src_shift, false), ""),
""
);
u64 mask = ~cast(u64)0;
if (size_bits < 64) {
mask = ((cast(u64)1)<<size_bits)-1;
}
// bits &= mask
bits = LLVMBuildAnd(p->builder, bits, LLVMConstInt(llvm_u64, mask, false), "");
u64 dst_need_bytes = (dst_shift + size_bits + 7) >> 3;
if (dst_shift == 0) {
if (dst_need_bytes <= 4) {
// v := u64(intrinsics.unaligned_load((^u32)(&dst[dst_byte])))
LLVMValueRef v = OdinLLVMBuildUnalignedLoad(p, ptr_offset(p, dst, dst_byte), t_u32_ptr);
v = LLVMBuildZExt(p->builder, v, llvm_u64, "");
// v = (v & ~mask) | bits
v = LLVMBuildAnd(p->builder, v, LLVMConstInt(llvm_u64, ~mask, false), "");
v = LLVMBuildOr(p->builder, v, bits, "");
// intrinsics.unaligned_store((^u32)(&dst[dst_byte]), u32(v))
v = LLVMBuildTrunc(p->builder, v, llvm_u32, "");
LLVMValueRef store = LLVMBuildStore(p->builder, v, LLVMBuildPointerCast(p->builder, ptr_offset(p, dst, dst_byte), llvm_u32_ptr, ""));
LLVMSetAlignment(store, 1);
} else {
// v := intrinsics.unaligned_load((^u64)(&dst[dst_byte]))
LLVMValueRef v = OdinLLVMBuildUnalignedLoad(p, ptr_offset(p, dst, dst_byte), t_u64_ptr);
// v = (v & ~mask) | bits
v = LLVMBuildAnd(p->builder, v, LLVMConstInt(llvm_u64, ~mask, false), "");
v = LLVMBuildOr(p->builder, v, bits, "");
// intrinsics.unaligned_store((^u64)(&dst[dst_byte]), v)
LLVMValueRef store = LLVMBuildStore(p->builder, v, LLVMBuildPointerCast(p->builder, ptr_offset(p, dst, dst_byte), llvm_u64_ptr, ""));
LLVMSetAlignment(store, 1);
}
} else {
// v0 := intrinsics.unaligned_load((^u64)(&dst[dst_byte]))
// v1 := intrinsics.unaligned_load((^u64)(&dst[dst_byte + 8]))
LLVMValueRef v0 = OdinLLVMBuildUnalignedLoad(p, ptr_offset(p, dst, dst_byte+0), t_u64_ptr);
LLVMValueRef v1 = OdinLLVMBuildUnalignedLoad(p, ptr_offset(p, dst, dst_byte+8), t_u64_ptr);
// v0 = (v0 & ~(mask << dst_shift)) | (bits << dst_shift)
v0 = LLVMBuildAnd(p->builder, v0, LLVMConstInt(llvm_u64, ~(mask << dst_shift), false), "");
v0 = LLVMBuildOr (p->builder, v0, LLVMBuildShl(p->builder, bits, LLVMConstInt(llvm_u64, dst_shift, false), ""), "");
// v1 = (v1 & ~(mask >> (64 - dst_shift))) | (bits >> (64 - dst_shift))
v1 = LLVMBuildAnd(p->builder, v1, LLVMConstInt(llvm_u64, ~(mask >> (64 - dst_shift)), false), "");
v1 = LLVMBuildOr (p->builder, v1, LLVMBuildLShr(p->builder, bits, LLVMConstInt(llvm_u64, (64 - dst_shift), false), ""), "");
// intrinsics.unaligned_store((^u64)(&dst[dst_byte]), v0)
// intrinsics.unaligned_store((^u64)(&dst[dst_byte + 8]), v1)
LLVMValueRef s0 = LLVMBuildStore(p->builder, v0, LLVMBuildPointerCast(p->builder, ptr_offset(p, dst, dst_byte+0), llvm_u64_ptr, ""));
LLVMValueRef s1 = LLVMBuildStore(p->builder, v1, LLVMBuildPointerCast(p->builder, ptr_offset(p, dst, dst_byte+8), llvm_u64_ptr, ""));
LLVMSetAlignment(s0, 1);
LLVMSetAlignment(s1, 1);
}
}
gb_internal void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) {
if (addr.addr.value == nullptr) {
return;
@@ -974,6 +1101,7 @@ gb_internal void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) {
if (addr.kind == lbAddr_BitField) {
lbValue dst = addr.addr;
lbValue src = {};
if (is_type_endian_big(addr.bitfield.type)) {
i64 shift_amount = 8*type_size_of(value.type) - addr.bitfield.bit_size;
lbValue shifted_value = value;
@@ -981,33 +1109,17 @@ gb_internal void lb_addr_store(lbProcedure *p, lbAddr addr, lbValue value) {
shifted_value.value,
LLVMConstInt(LLVMTypeOf(shifted_value.value), shift_amount, false), "");
lbValue src = lb_address_from_load_or_generate_local(p, shifted_value);
auto args = array_make<lbValue>(temporary_allocator(), 4);
args[0] = dst;
args[1] = src;
args[2] = lb_const_int(p->module, t_uintptr, addr.bitfield.bit_offset);
args[3] = lb_const_int(p->module, t_uintptr, addr.bitfield.bit_size);
lb_emit_runtime_call(p, "__write_bits", args);
} else if ((addr.bitfield.bit_offset % 8) == 0 &&
(addr.bitfield.bit_size % 8) == 0) {
lbValue src = lb_address_from_load_or_generate_local(p, value);
lbValue byte_offset = lb_const_int(p->module, t_uintptr, addr.bitfield.bit_offset/8);
lbValue byte_size = lb_const_int(p->module, t_uintptr, addr.bitfield.bit_size/8);
lbValue dst_offset = lb_emit_conv(p, dst, t_u8_ptr);
dst_offset = lb_emit_ptr_offset(p, dst_offset, byte_offset);
lb_mem_copy_non_overlapping(p, dst_offset, src, byte_size);
src = lb_address_from_load_or_generate_local(p, shifted_value);
} else {
lbValue src = lb_address_from_load_or_generate_local(p, value);
auto args = array_make<lbValue>(temporary_allocator(), 4);
args[0] = dst;
args[1] = src;
args[2] = lb_const_int(p->module, t_uintptr, addr.bitfield.bit_offset);
args[3] = lb_const_int(p->module, t_uintptr, addr.bitfield.bit_size);
lb_emit_runtime_call(p, "__write_bits", args);
src = lb_address_from_load_or_generate_local(p, value);
}
u64 buf_bytes = cast(u64)type_size_of(type_deref(dst.type));
u64 dst_bit = cast(u64)addr.bitfield.bit_offset;
u64 src_bit = cast(u64)0;
u64 size_bits = cast(u64)addr.bitfield.bit_size;
lb_copy_bits(p, dst.value, src.value, buf_bytes, dst_bit, src_bit, size_bits);
return;
} else if (addr.kind == lbAddr_Map) {
lb_internal_dynamic_map_set(p, addr.addr, addr.map.type, addr.map.key, value, p->curr_stmt);
@@ -1289,53 +1401,28 @@ gb_internal lbValue lb_addr_load(lbProcedure *p, lbAddr const &addr) {
}
}
i64 total_bitfield_bit_size = 8*type_size_of(lb_addr_type(addr));
i64 dst_byte_size = type_size_of(addr.bitfield.type);
lbAddr dst = lb_add_local_generated(p, addr.bitfield.type, true);
lbValue src = addr.addr;
lbValue bit_offset = lb_const_int(p->module, t_uintptr, addr.bitfield.bit_offset);
lbValue bit_size = lb_const_int(p->module, t_uintptr, addr.bitfield.bit_size);
lbValue byte_offset = lb_const_int(p->module, t_uintptr, (addr.bitfield.bit_offset+7)/8);
lbValue byte_size = lb_const_int(p->module, t_uintptr, (addr.bitfield.bit_size+7)/8);
GB_ASSERT(type_size_of(addr.bitfield.type) >= ((addr.bitfield.bit_size+7)/8));
lbValue r = {};
if (is_type_endian_big(addr.bitfield.type)) {
auto args = array_make<lbValue>(temporary_allocator(), 4);
args[0] = dst.addr;
args[1] = src;
args[2] = bit_offset;
args[3] = bit_size;
lb_emit_runtime_call(p, "__read_bits", args);
u64 buf_bytes = cast(u64)type_size_of(type_deref(src.type));
u64 dst_bit = cast(u64)0;
u64 src_bit = cast(u64)addr.bitfield.bit_offset;
u64 size_bits = cast(u64)addr.bitfield.bit_size;
lb_copy_bits(p, dst.addr.value, src.value, buf_bytes, dst_bit, src_bit, size_bits);
r = lb_addr_load(p, dst);
if (is_type_endian_big(addr.bitfield.type)) {
LLVMValueRef shift_amount = LLVMConstInt(
lb_type(p->module, lb_addr_type(dst)),
8*dst_byte_size - addr.bitfield.bit_size,
false
);
r = lb_addr_load(p, dst);
r.value = LLVMBuildShl(p->builder, r.value, shift_amount, "");
} else if ((addr.bitfield.bit_offset % 8) == 0) {
do_mask = 8*dst_byte_size != addr.bitfield.bit_size;
lbValue copy_size = byte_size;
lbValue src_offset = lb_emit_conv(p, src, t_u8_ptr);
src_offset = lb_emit_ptr_offset(p, src_offset, byte_offset);
if (addr.bitfield.bit_offset + 8*dst_byte_size <= total_bitfield_bit_size) {
copy_size = lb_const_int(p->module, t_uintptr, dst_byte_size);
}
lb_mem_copy_non_overlapping(p, dst.addr, src_offset, copy_size, false);
r = lb_addr_load(p, dst);
} else {
auto args = array_make<lbValue>(temporary_allocator(), 4);
args[0] = dst.addr;
args[1] = src;
args[2] = bit_offset;
args[3] = bit_size;
lb_emit_runtime_call(p, "__read_bits", args);
r = lb_addr_load(p, dst);
}
Type *t = addr.bitfield.type;