diff --git a/base/intrinsics/intrinsics.odin b/base/intrinsics/intrinsics.odin index e78a41719..c78559f3f 100644 --- a/base/intrinsics/intrinsics.odin +++ b/base/intrinsics/intrinsics.odin @@ -286,6 +286,10 @@ simd_scatter :: proc(ptr: #simd[N]rawptr, val: #simd[N]T, mask: #simd[N]U) simd_masked_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- simd_masked_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- +simd_masked_expand_load :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) -> #simd[N]T where type_is_integer(U) || type_is_boolean(U) --- +simd_masked_compress_store :: proc(ptr: rawptr, val: #simd[N]T, mask: #simd[N]U) where type_is_integer(U) || type_is_boolean(U) --- + + simd_shuffle :: proc(a, b: #simd[N]T, indices: ..int) -> #simd[len(indices)]T --- simd_select :: proc(cond: #simd[N]boolean_or_integer, true, false: #simd[N]T) -> #simd[N]T --- diff --git a/core/simd/simd.odin b/core/simd/simd.odin index e93c94687..1f3c67b72 100644 --- a/core/simd/simd.odin +++ b/core/simd/simd.odin @@ -108,7 +108,8 @@ gather :: intrinsics.simd_gather scatter :: intrinsics.simd_scatter masked_load :: intrinsics.simd_masked_load masked_store :: intrinsics.simd_masked_store - +masked_expand_load :: intrinsics.simd_masked_expand_load +masked_compress_store :: intrinsics.simd_masked_compress_store // extract :: proc(a: #simd[N]T, idx: uint) -> T extract :: intrinsics.simd_extract diff --git a/src/check_builtin.cpp b/src/check_builtin.cpp index b5851bc01..bde102a8d 100644 --- a/src/check_builtin.cpp +++ b/src/check_builtin.cpp @@ -667,12 +667,16 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan case BuiltinProc_simd_scatter: case BuiltinProc_simd_masked_load: case BuiltinProc_simd_masked_store: + case BuiltinProc_simd_masked_expand_load: + case BuiltinProc_simd_masked_compress_store: { // gather (ptr: #simd[N]rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T // scatter(ptr: #simd[N]rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) // masked_load (ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T // masked_store(ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) + // masked_expand_load (ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) -> #simd[N]T + // masked_compress_store(ptr: rawptr, values: #simd[N]T, mask: #simd[N]int_or_bool) Operand ptr = {}; Operand values = {}; @@ -733,7 +737,8 @@ gb_internal bool check_builtin_simd_operation(CheckerContext *c, Operand *operan } if (id == BuiltinProc_simd_gather || - id == BuiltinProc_simd_masked_load) { + id == BuiltinProc_simd_masked_load || + id == BuiltinProc_simd_masked_expand_load) { operand->mode = Addressing_Value; operand->type = values.type; } else { diff --git a/src/checker_builtin_procs.hpp b/src/checker_builtin_procs.hpp index a5f688cd8..6245dadaf 100644 --- a/src/checker_builtin_procs.hpp +++ b/src/checker_builtin_procs.hpp @@ -195,6 +195,8 @@ BuiltinProc__simd_begin, BuiltinProc_simd_scatter, BuiltinProc_simd_masked_load, BuiltinProc_simd_masked_store, + BuiltinProc_simd_masked_expand_load, + BuiltinProc_simd_masked_compress_store, // Platform specific SIMD intrinsics BuiltinProc_simd_x86__MM_SHUFFLE, @@ -530,6 +532,8 @@ gb_global BuiltinProc builtin_procs[BuiltinProc_COUNT] = { {STR_LIT("simd_scatter"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_masked_load"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_masked_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_masked_expand_load"), 3, false, Expr_Expr, BuiltinProcPkg_intrinsics}, + {STR_LIT("simd_masked_compress_store"), 3, false, Expr_Stmt, BuiltinProcPkg_intrinsics}, {STR_LIT("simd_x86__MM_SHUFFLE"), 4, false, Expr_Expr, BuiltinProcPkg_intrinsics}, diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index ce1cc8586..ceaeb1aca 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1693,6 +1693,8 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn case BuiltinProc_simd_scatter: case BuiltinProc_simd_masked_load: case BuiltinProc_simd_masked_store: + case BuiltinProc_simd_masked_expand_load: + case BuiltinProc_simd_masked_compress_store: { LLVMValueRef ptr = arg0.value; LLVMValueRef val = arg1.value; @@ -1705,11 +1707,14 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn char const *name = nullptr; switch (builtin_id) { - case BuiltinProc_simd_gather: name = "llvm.masked.gather"; break; - case BuiltinProc_simd_scatter: name = "llvm.masked.scatter"; break; - case BuiltinProc_simd_masked_load: name = "llvm.masked.load"; break; - case BuiltinProc_simd_masked_store: name = "llvm.masked.store"; break; + case BuiltinProc_simd_gather: name = "llvm.masked.gather"; break; + case BuiltinProc_simd_scatter: name = "llvm.masked.scatter"; break; + case BuiltinProc_simd_masked_load: name = "llvm.masked.load"; break; + case BuiltinProc_simd_masked_store: name = "llvm.masked.store"; break; + case BuiltinProc_simd_masked_expand_load: name = "llvm.masked.expandload"; break; + case BuiltinProc_simd_masked_compress_store: name = "llvm.masked.compressstore"; break; } + unsigned type_count = 2; LLVMTypeRef types[2] = { lb_type(p->module, arg1.type), lb_type(p->module, arg0.type) @@ -1718,6 +1723,7 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn auto alignment = cast(unsigned long long)type_align_of(base_array_type(arg1.type)); LLVMValueRef align = LLVMConstInt(LLVMInt32TypeInContext(p->module->ctx), alignment, false); + unsigned arg_count = 4; LLVMValueRef args[4] = {}; switch (builtin_id) { case BuiltinProc_simd_masked_load: @@ -1739,9 +1745,25 @@ gb_internal lbValue lb_build_builtin_simd_proc(lbProcedure *p, Ast *expr, TypeAn args[2] = align; args[3] = mask; break; + + case BuiltinProc_simd_masked_expand_load: + arg_count = 3; + type_count = 1; + args[0] = ptr; + args[1] = mask; + args[2] = val; + break; + + case BuiltinProc_simd_masked_compress_store: + arg_count = 3; + type_count = 1; + args[0] = val; + args[1] = ptr; + args[2] = mask; + break; } - res.value = lb_call_intrinsic(p, name, args, gb_count_of(args), types, gb_count_of(types)); + res.value = lb_call_intrinsic(p, name, args, arg_count, types, type_count); return res; }