mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-21 22:05:20 +00:00
Merge branch 'master' of https://github.com/odin-lang/Odin
This commit is contained in:
@@ -928,6 +928,7 @@ non_zero_append :: proc{
|
||||
non_zero_append_elem_string,
|
||||
|
||||
append_fixed_capacity_elem,
|
||||
append_fixed_capacity_elems,
|
||||
non_zero_append_elem_fixed_capacity_string,
|
||||
|
||||
non_zero_append_soa_elem,
|
||||
|
||||
@@ -44,7 +44,8 @@ Context_256 :: struct {
|
||||
length: u64,
|
||||
md_bits: int,
|
||||
|
||||
is_initialized: bool,
|
||||
is_hw_accelerated: bool,
|
||||
is_initialized: bool,
|
||||
}
|
||||
|
||||
// Context_512 is a SHA-384, SHA-512 or SHA-512/256 instance.
|
||||
@@ -55,7 +56,8 @@ Context_512 :: struct {
|
||||
length: u64,
|
||||
md_bits: int,
|
||||
|
||||
is_initialized: bool,
|
||||
is_hw_accelerated: bool,
|
||||
is_initialized: bool,
|
||||
}
|
||||
|
||||
// init_224 initializes a Context_256 for SHA-224.
|
||||
@@ -88,6 +90,9 @@ init_512_256 :: proc(ctx: ^Context_512) {
|
||||
_init(ctx)
|
||||
}
|
||||
|
||||
@(private)
|
||||
ERR_HW_NOT_SUPPORTED :: "crypto/sha2: hardware implementation unsupported"
|
||||
|
||||
@(private)
|
||||
_init :: proc(ctx: ^$T) {
|
||||
when T == Context_256 {
|
||||
@@ -113,6 +118,8 @@ _init :: proc(ctx: ^$T) {
|
||||
case:
|
||||
panic("crypto/sha2: invalid digest output length")
|
||||
}
|
||||
|
||||
ctx.is_hw_accelerated = is_hardware_accelerated_256()
|
||||
} else when T == Context_512 {
|
||||
switch ctx.md_bits {
|
||||
case 256:
|
||||
@@ -148,6 +155,8 @@ _init :: proc(ctx: ^$T) {
|
||||
case:
|
||||
panic("crypto/sha2: invalid digest output length")
|
||||
}
|
||||
|
||||
ctx.is_hw_accelerated = is_hardware_accelerated_512()
|
||||
}
|
||||
|
||||
ctx.length = 0
|
||||
@@ -399,7 +408,7 @@ SHA512_F4 :: #force_inline proc "contextless" (x: u64) -> u64 {
|
||||
@(private)
|
||||
sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) #no_bounds_check {
|
||||
when T == Context_256 {
|
||||
if is_hardware_accelerated_256() {
|
||||
if ctx.is_hw_accelerated {
|
||||
sha256_transf_hw(ctx, data)
|
||||
return
|
||||
}
|
||||
@@ -410,6 +419,11 @@ sha2_transf :: proc "contextless" (ctx: ^$T, data: []byte) #no_bounds_check {
|
||||
|
||||
CURR_BLOCK_SIZE :: BLOCK_SIZE_256
|
||||
} else when T == Context_512 {
|
||||
if ctx.is_hw_accelerated {
|
||||
sha512_transf_hw(ctx, data)
|
||||
return
|
||||
}
|
||||
|
||||
w: [SHA512_ROUNDS]u64
|
||||
wv: [8]u64
|
||||
t1, t2: u64
|
||||
|
||||
@@ -3,15 +3,13 @@
|
||||
#+build !arm32
|
||||
package sha2
|
||||
|
||||
@(private = "file")
|
||||
ERR_HW_NOT_SUPPORTED :: "crypto/sha2: hardware implementation unsupported"
|
||||
|
||||
// is_hardware_accelerated_256 returns true if and only if (⟺) hardware accelerated
|
||||
// SHA-224/SHA-256 is supported.
|
||||
// is_hardware_accelerated_256 returns true if and only if (⟺) hardware
|
||||
// accelerated SHA-224/SHA-256 is supported.
|
||||
is_hardware_accelerated_256 :: proc "contextless" () -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
@(private)
|
||||
sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
@@ -70,8 +70,7 @@ sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) #no_bou
|
||||
tmp = x86._mm_shuffle_epi32(tmp, 0xb1) // CDAB
|
||||
state_1 = x86._mm_shuffle_epi32(state_1, 0x1b) // EFGH
|
||||
state_0 := x86._mm_alignr_epi8(tmp, state_1, 8) // ABEF
|
||||
// state_1 = x86._mm_blend_epi16(state_1, tmp, 0xf0) // CDGH
|
||||
state_1 = kludge_mm_blend_epi16_0xf0(state_1, tmp)
|
||||
state_1 = x86._mm_blend_epi16(state_1, tmp, 0xf0) // CDGH
|
||||
|
||||
data := data
|
||||
for len(data) >= BLOCK_SIZE_256 {
|
||||
@@ -238,18 +237,9 @@ sha256_transf_hw :: proc "contextless" (ctx: ^Context_256, data: []byte) #no_bou
|
||||
// Write back the updated state
|
||||
tmp = x86._mm_shuffle_epi32(state_0, 0x1b) // FEBA
|
||||
state_1 = x86._mm_shuffle_epi32(state_1, 0xb1) // DCHG
|
||||
// state_0 = x86._mm_blend_epi16(tmp, state_1, 0xf0) // DCBA
|
||||
state_0 = kludge_mm_blend_epi16_0xf0(tmp, state_1)
|
||||
state_0 = x86._mm_blend_epi16(tmp, state_1, 0xf0) // DCBA
|
||||
state_1 = x86._mm_alignr_epi8(state_1, tmp, 8) // ABEF
|
||||
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[0]), state_0)
|
||||
intrinsics.unaligned_store((^x86.__m128i)(&ctx.h[4]), state_1)
|
||||
}
|
||||
|
||||
@(private = "file")
|
||||
kludge_mm_blend_epi16_0xf0 :: #force_inline proc "contextless"(a, b: x86.__m128i) -> x86.__m128i {
|
||||
// HACK HACK HACK: LLVM got rid of `llvm.x86.sse41.pblendw`.
|
||||
a_ := simd.to_array(a)
|
||||
b_ := simd.to_array(b)
|
||||
return x86.__m128i{a_[0], b_[1]}
|
||||
}
|
||||
|
||||
12
core/crypto/sha2/sha512_impl_hw_gen.odin
Normal file
12
core/crypto/sha2/sha512_impl_hw_gen.odin
Normal file
@@ -0,0 +1,12 @@
|
||||
package sha2
|
||||
|
||||
// is_hardware_accelerated_512 returns true if and only if (⟺) hardware
|
||||
// accelerated SHA-384/SHA-512/SHA-512/256 are supported.
|
||||
is_hardware_accelerated_512 :: proc "contextless" () -> bool {
|
||||
return false
|
||||
}
|
||||
|
||||
@(private)
|
||||
sha512_transf_hw :: proc "contextless" (ctx: ^Context_512, data: []byte) {
|
||||
panic_contextless(ERR_HW_NOT_SUPPORTED)
|
||||
}
|
||||
@@ -26,7 +26,18 @@ _mm_blendv_epi8 :: #force_inline proc "c" (a, b, mask: __m128i) -> __m128i {
|
||||
}
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_blend_epi16 :: #force_inline proc "c" (a, b: __m128i, $IMM8: u8) -> __m128i {
|
||||
return transmute(__m128i)pblendw(transmute(i16x8)a, transmute(i16x8)b, IMM8)
|
||||
return transmute(__m128i)simd.shuffle(
|
||||
transmute(i16x8)b,
|
||||
transmute(i16x8)a,
|
||||
0 when (IMM8 >> 0) & 1 == 1 else 8,
|
||||
1 when (IMM8 >> 1) & 1 == 1 else 9,
|
||||
2 when (IMM8 >> 2) & 1 == 1 else 10,
|
||||
3 when (IMM8 >> 3) & 1 == 1 else 11,
|
||||
4 when (IMM8 >> 4) & 1 == 1 else 12,
|
||||
5 when (IMM8 >> 5) & 1 == 1 else 13,
|
||||
6 when (IMM8 >> 6) & 1 == 1 else 14,
|
||||
7 when (IMM8 >> 7) & 1 == 1 else 15,
|
||||
)
|
||||
}
|
||||
@(require_results, enable_target_feature="sse4.1")
|
||||
_mm_blendv_pd :: #force_inline proc "c" (a, b, mask: __m128d) -> __m128d {
|
||||
@@ -303,8 +314,6 @@ foreign _ {
|
||||
blendpd :: proc(a, b: __m128d, #const imm2: u8) -> __m128d ---
|
||||
@(link_name = "llvm.x86.sse41.blendps")
|
||||
blendps :: proc(a, b: __m128, #const imm4: u8) -> __m128 ---
|
||||
@(link_name = "llvm.x86.sse41.pblendw")
|
||||
pblendw :: proc(a: i16x8, b: i16x8, #const imm8: u8) -> i16x8 ---
|
||||
@(link_name = "llvm.x86.sse41.insertps")
|
||||
insertps :: proc(a, b: __m128, #const imm8: u8) -> __m128 ---
|
||||
@(link_name = "llvm.x86.sse41.pmaxsb")
|
||||
|
||||
Reference in New Issue
Block a user