This commit is contained in:
gingerBill
2026-03-16 12:21:56 +00:00
6 changed files with 47 additions and 23 deletions

View File

@@ -928,6 +928,7 @@ non_zero_append :: proc{
non_zero_append_elem_string,
append_fixed_capacity_elem,
append_fixed_capacity_elems,
non_zero_append_elem_fixed_capacity_string,
non_zero_append_soa_elem,

View File

@@ -44,7 +44,8 @@ Context_256 :: struct {
length: u64,
md_bits: int,
is_initialized: bool,
is_hw_accelerated: bool,
is_initialized: bool,
}
// Context_512 is a SHA-384, SHA-512 or SHA-512/256 instance.
@@ -55,7 +56,8 @@ Context_512 :: struct {
length: u64,
md_bits: int,
is_initialized: bool,
is_hw_accelerated: bool,
is_initialized: bool,
}
// init_224 initializes a Context_256 for SHA-224.
@@ -88,6 +90,9 @@ init_512_256 :: proc(ctx: ^Context_512) {
_init(ctx)
}
@(private)
ERR_HW_NOT_SUPPORTED :: "crypto/sha2: hardware implementation unsupported"
@(private)
_init :: proc(ctx: ^$T) {
when T == Context_256 {
@@ -113,6 +118,8 @@ _init :: proc(ctx: ^$T) {
case:
panic("crypto/sha2: invalid digest output length")
}
ctx.is_hw_accelerated = is_hardware_accelerated_256()
} else when T == Context_512 {
switch ctx.md_bits {
case 256:
@@ -148,6 +155,8 @@ _init :: proc(ctx: ^$T) {
case:
panic("crypto/sha2: invalid digest output length")
}
ctx.is_hw_accelerated = is_hardware_accelerated_512()
}
ctx.length = 0
@@ -399,7 +408,7 @@ SHA512_F4 :: #force_inline proc "contextless" (x: u64) -> u64 {
@(private)
sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) #no_bounds_check {
when T == Context_256 {
if is_hardware_accelerated_256() {
if ctx.is_hw_accelerated {
sha256_transf_hw(ctx, data)
return
}
@@ -410,6 +419,11 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) #no_bounds_check {
CURR_BLOCK_SIZE :: BLOCK_SIZE_256
} else when T == Context_512 {
if ctx.is_hw_accelerated {
sha512_transf_hw(ctx, data)
return
}
w: [SHA512_ROUNDS]u64
wv: [8]u64
t1, t2: u64

View File

@@ -3,15 +3,13 @@
#+build !arm32
package sha2
@(private = "file")
ERR_HW_NOT_SUPPORTED :: "crypto/sha2: hardware implementation unsupported"
// is_hardware_accelerated_256 returns true if and only if () hardware accelerated
// SHA-224/SHA-256 is supported.
// is_hardware_accelerated_256 returns true if and only if () hardware
// accelerated SHA-224/SHA-256 is supported.
is_hardware_accelerated_256 :: proc "contextless" () -> bool {
return false
}
@(private)
sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) {
panic_contextless(ERR_HW_NOT_SUPPORTED)
}

View File

@@ -70,8 +70,7 @@ sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) #no_bou
tmp = x86._mm_shuffle_epi32(tmp, 0xb1) // CDAB
state_1 = x86._mm_shuffle_epi32(state_1, 0x1b) // EFGH
state_0 := x86._mm_alignr_epi8(tmp, state_1, 8) // ABEF
// state_1 = x86._mm_blend_epi16(state_1, tmp, 0xf0) // CDGH
state_1 = kludge_mm_blend_epi16_0xf0(state_1, tmp)
state_1 = x86._mm_blend_epi16(state_1, tmp, 0xf0) // CDGH
data := data
for len(data) >= BLOCK_SIZE_256 {
@@ -238,18 +237,9 @@ sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) #no_bou
// Write back the updated state
tmp = x86._mm_shuffle_epi32(state_0, 0x1b) // FEBA
state_1 = x86._mm_shuffle_epi32(state_1, 0xb1) // DCHG
// state_0 = x86._mm_blend_epi16(tmp, state_1, 0xf0) // DCBA
state_0 = kludge_mm_blend_epi16_0xf0(tmp, state_1)
state_0 = x86._mm_blend_epi16(tmp, state_1, 0xf0) // DCBA
state_1 = x86._mm_alignr_epi8(state_1, tmp, 8) // ABEF
intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[0]), state_0)
intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[4]), state_1)
}
@(private = "file")
kludge_mm_blend_epi16_0xf0 :: #force_inline proc "contextless"(a, b: x86.__m128i) -> x86.__m128i {
// HACK HACK HACK: LLVM got rid of `llvm.x86.sse41.pblendw`.
a_ := simd.to_array(a)
b_ := simd.to_array(b)
return x86.__m128i{a_[0], b_[1]}
}

View File

@@ -0,0 +1,12 @@
package sha2
// is_hardware_accelerated_512 returns true if and only if (⟺) hardware
// accelerated SHA-384/SHA-512/SHA-512/256 are supported.
is_hardware_accelerated_512 :: proc "contextless" () -> bool {
return false
}
@(private)
sha512_transf_hw :: proc "contextless" (ctx: ^Context_512, data: []byte) {
panic_contextless(ERR_HW_NOT_SUPPORTED)
}

View File

@@ -26,7 +26,18 @@ _mm_blendv_epi8 :: #force_inline proc "c" (a, b, mask: __m128i) -> __m128i {
}
@(require_results, enable_target_feature="sse4.1")
_mm_blend_epi16 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i {
return transmute(__m128i)pblendw(transmute(i16x8)a, transmute(i16x8)b, IMM8)
return transmute(__m128i)simd.shuffle(
transmute(i16x8)b,
transmute(i16x8)a,
0 when (IMM8 >> 0) & 1 == 1 else 8,
1 when (IMM8 >> 1) & 1 == 1 else 9,
2 when (IMM8 >> 2) & 1 == 1 else 10,
3 when (IMM8 >> 3) & 1 == 1 else 11,
4 when (IMM8 >> 4) & 1 == 1 else 12,
5 when (IMM8 >> 5) & 1 == 1 else 13,
6 when (IMM8 >> 6) & 1 == 1 else 14,
7 when (IMM8 >> 7) & 1 == 1 else 15,
)
}
@(require_results, enable_target_feature="sse4.1")
_mm_blendv_pd :: #force_inline proc "c" (a, b, mask: __m128d) -> __m128d {
@@ -303,8 +314,6 @@ foreign _ {
blendpd :: proc(a, b: __m128d, #const imm2: u8) -> __m128d ---
@(link_name = "llvm.x86.sse41.blendps")
blendps :: proc(a, b: __m128, #const imm4: u8) -> __m128 ---
@(link_name = "llvm.x86.sse41.pblendw")
pblendw :: proc(a: i16x8, b: i16x8, #const imm8: u8) -> i16x8 ---
@(link_name = "llvm.x86.sse41.insertps")
insertps :: proc(a, b: __m128, #const imm8: u8) -> __m128 ---
@(link_name = "llvm.x86.sse41.pmaxsb")