mirror of
https://github.com/odin-lang/Odin.git
synced 2026-06-19 16:42:33 +00:00
rexcode/arm64: NEON shift-by-immediate encode forms + encoder extension
First encoder-extension family. Adds Operand_Type VEC_SHIFT and Operand_Encoding NEON_SHL_IMM/NEON_SHR_IMM: the element-size marker bit sits in the entry's bits, the encoder packs the amount into the low immh:immb bits (left = shift; right = esize - shift, esize from the vector operand via vec_esize/form.ops[0]), and the decoder recovers esize from immh to compute the amount. Adds 13 mnemonics (91 forms) via specgen: left SHL/SLI/SQSHLU/SQSHL, right SSHR/USHR/SRSHR/URSHR/SSRA/USRA/SRSRA/URSRA/SRI. specgen derives bits/mask empirically by varying registers AND the shift (canon = operand bits zero; other extreme sets all shift bits), so per-arrangement immh discrimination + the growing shift-field width fall out automatically. Verified end-to-end: encode matches llvm-mc byte-for-byte AND decode recovers mnemonic + amount (sshr/shl/sli/ushr/srsra across B/H/S/D); arm64 check + 461 tests pass. First of the encoder-extension phase ([[rexcode-encode-coverage]]); CCMP_IMM imm5@20:16 pattern generalizes here.
This commit is contained in:
@@ -199,6 +199,18 @@ extract_operand_inline :: #force_inline proc "contextless" (
|
||||
case .BARRIER_FIELD:
|
||||
return Operand{immediate = i64((word >> 8) & 0xF), kind = .IMMEDIATE, size = 1}
|
||||
|
||||
// ---- NEON shift-by-immediate: recover the amount from immh:immb ---------
|
||||
case .NEON_SHL_IMM, .NEON_SHR_IMM:
|
||||
immh := (word >> 19) & 0xF
|
||||
esize: i64 = 8
|
||||
if immh >= 8 { esize = 64 }
|
||||
else if immh >= 4 { esize = 32 }
|
||||
else if immh >= 2 { esize = 16 }
|
||||
val := i64((word >> 16) & 0x7F)
|
||||
amt := val - esize
|
||||
if en == .NEON_SHR_IMM { amt = 2 * esize - val }
|
||||
return Operand{immediate = amt, kind = .IMMEDIATE, size = 1}
|
||||
|
||||
// ---- Memory operand variants ------------------------------------------
|
||||
case .OFFSET_BASE_U12:
|
||||
size := u32(1) << ((word >> 30) & 0x3)
|
||||
|
||||
@@ -247,7 +247,7 @@ operand_matches_inline :: #force_inline proc "contextless" (
|
||||
return op.kind == .IMMEDIATE
|
||||
|
||||
case .IMM_12, .IMM_16, .IMM_8, .IMM_6, .IMM_5, .IMM_4, .IMM_3, .IMM_2,
|
||||
.NZCV_IMM, .SYS_REG, .HW_SHIFT, .LSE_SIZE:
|
||||
.NZCV_IMM, .SYS_REG, .HW_SHIFT, .LSE_SIZE, .VEC_SHIFT:
|
||||
return op.kind == .IMMEDIATE
|
||||
case .BITMASK_IMM:
|
||||
// The user passes the raw logical mask value; we validate that it
|
||||
@@ -268,6 +268,17 @@ operand_matches_inline :: #force_inline proc "contextless" (
|
||||
// =============================================================================
|
||||
|
||||
@(private="file")
|
||||
// Element size in bits for a NEON vector arrangement operand type.
|
||||
vec_esize :: #force_inline proc "contextless" (ot: Operand_Type) -> u32 {
|
||||
#partial switch ot {
|
||||
case .V_8B, .V_16B: return 8
|
||||
case .V_4H, .V_8H, .V_4H_FP16, .V_8H_FP16: return 16
|
||||
case .V_2S, .V_4S: return 32
|
||||
case .V_1D, .V_2D: return 64
|
||||
}
|
||||
return 8
|
||||
}
|
||||
|
||||
pack_operand_inline :: #force_inline proc(
|
||||
op: ^Operand,
|
||||
enc: Operand_Encoding,
|
||||
@@ -422,6 +433,14 @@ pack_operand_inline :: #force_inline proc(
|
||||
case .VA:
|
||||
return (u32(reg_hw(op.reg)) & 0x1F) << 10
|
||||
|
||||
// NEON shift-by-immediate: the element-size marker is already in `bits`;
|
||||
// the operand drives only the low immh:immb bits at 22:16.
|
||||
case .NEON_SHL_IMM:
|
||||
return (u32(op.immediate) & 0x3F) << 16
|
||||
case .NEON_SHR_IMM:
|
||||
esize := vec_esize(form.ops[0])
|
||||
return ((esize - u32(op.immediate)) & 0x3F) << 16
|
||||
|
||||
// NEON MOVI/FMOV immediate split: abc at bits 18-16, defgh at bits 9-5.
|
||||
case .NEON_IMM8_FMOV:
|
||||
v := u32(op.immediate) & 0xFF
|
||||
|
||||
@@ -165,6 +165,10 @@ Operand_Type :: enum u8 {
|
||||
|
||||
// ---- Condition code ----
|
||||
COND,
|
||||
|
||||
// ---- NEON shift-by-immediate amount (encoded into immh:immb together
|
||||
// with the element size: left = esize+shift, right = 2*esize-shift) ----
|
||||
VEC_SHIFT,
|
||||
}
|
||||
|
||||
// Where each operand's bits land in the 32-bit word.
|
||||
@@ -223,6 +227,12 @@ Operand_Encoding :: enum u8 {
|
||||
NEON_INDEX_S, // S lane index
|
||||
NEON_INDEX_D, // D lane index
|
||||
|
||||
// ---- NEON shift-by-immediate (immh:immb at bits 22:16) ----
|
||||
// The element-size marker bit is fixed in the entry's `bits`; the operand
|
||||
// drives only the low bits. Left: low = shift. Right: low = esize - shift.
|
||||
NEON_SHL_IMM,
|
||||
NEON_SHR_IMM,
|
||||
|
||||
// ---- LSE atomics ------------------------------------------------------
|
||||
ATOMIC_RS, // Rs (source / compare) at bits 16-20
|
||||
ATOMIC_RT, // Rt (target) at bits 0-4
|
||||
|
||||
@@ -1065,14 +1065,40 @@ inst_bif_r_r_r :: #force_inline proc "contextless" (dst: Regist
|
||||
emit_bif_r_r_r :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_bif_r_r_r(dst, src, src2)) }
|
||||
inst_bsl_r_r_r :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .BSL, operand_count = 3, length = 4, ops = {op_v_16b(u8(reg_hw(dst))), op_v_16b(u8(reg_hw(src))), op_v_16b(u8(reg_hw(src2))), {}}} }
|
||||
emit_bsl_r_r_r :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_bsl_r_r_r(dst, src, src2)) }
|
||||
inst_shl_v_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SHL_V, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_shl_v_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_shl_v_r_r_i(dst, src, imm)) }
|
||||
inst_sqshl_v_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SQSHL_V, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_sqshl_v_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_sqshl_v_r_r_i(dst, src, imm)) }
|
||||
inst_sqshlu_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SQSHLU, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_sqshlu_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_sqshlu_r_r_i(dst, src, imm)) }
|
||||
inst_srshl_r_r_r :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .SRSHL, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_v_8b(u8(reg_hw(src2))), {}}} }
|
||||
emit_srshl_r_r_r :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_srshl_r_r_r(dst, src, src2)) }
|
||||
inst_urshl_r_r_r :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .URSHL, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_v_8b(u8(reg_hw(src2))), {}}} }
|
||||
emit_urshl_r_r_r :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_urshl_r_r_r(dst, src, src2)) }
|
||||
inst_sshr_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SSHR, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_sshr_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_sshr_r_r_i(dst, src, imm)) }
|
||||
inst_ushr_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .USHR, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_ushr_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_ushr_r_r_i(dst, src, imm)) }
|
||||
inst_ssra_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SSRA, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_ssra_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_ssra_r_r_i(dst, src, imm)) }
|
||||
inst_usra_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .USRA, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_usra_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_usra_r_r_i(dst, src, imm)) }
|
||||
inst_srshr_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SRSHR, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_srshr_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_srshr_r_r_i(dst, src, imm)) }
|
||||
inst_urshr_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .URSHR, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_urshr_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_urshr_r_r_i(dst, src, imm)) }
|
||||
inst_srsra_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SRSRA, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_srsra_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_srsra_r_r_i(dst, src, imm)) }
|
||||
inst_ursra_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .URSRA, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_ursra_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_ursra_r_r_i(dst, src, imm)) }
|
||||
inst_sshl_r_r_r :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .SSHL, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_v_8b(u8(reg_hw(src2))), {}}} }
|
||||
emit_sshl_r_r_r :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_sshl_r_r_r(dst, src, src2)) }
|
||||
inst_ushl_r_r_r :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .USHL, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_v_8b(u8(reg_hw(src2))), {}}} }
|
||||
emit_ushl_r_r_r :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_ushl_r_r_r(dst, src, src2)) }
|
||||
inst_sli_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SLI, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_sli_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_sli_r_r_i(dst, src, imm)) }
|
||||
inst_sri_r_r_i :: #force_inline proc "contextless" (dst: Register, src: Register, imm: i64) -> Instruction { return Instruction{mnemonic = .SRI, operand_count = 3, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), op_imm(imm, 4), {}}} }
|
||||
emit_sri_r_r_i :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, imm: i64) { append(instructions, inst_sri_r_r_i(dst, src, imm)) }
|
||||
inst_not_v_r_r :: #force_inline proc "contextless" (dst: Register, src: Register) -> Instruction { return Instruction{mnemonic = .NOT_V, operand_count = 2, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), {}, {}}} }
|
||||
emit_not_v_r_r :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register) { append(instructions, inst_not_v_r_r(dst, src)) }
|
||||
inst_rbit_v_r_r :: #force_inline proc "contextless" (dst: Register, src: Register) -> Instruction { return Instruction{mnemonic = .RBIT_V, operand_count = 2, length = 4, ops = {op_v_8b(u8(reg_hw(dst))), op_v_8b(u8(reg_hw(src))), {}, {}}} }
|
||||
@@ -2944,14 +2970,40 @@ inst_bif :: inst_bif_r_r_r
|
||||
emit_bif :: emit_bif_r_r_r
|
||||
inst_bsl :: inst_bsl_r_r_r
|
||||
emit_bsl :: emit_bsl_r_r_r
|
||||
inst_shl_v :: inst_shl_v_r_r_i
|
||||
emit_shl_v :: emit_shl_v_r_r_i
|
||||
inst_sqshl_v :: inst_sqshl_v_r_r_i
|
||||
emit_sqshl_v :: emit_sqshl_v_r_r_i
|
||||
inst_sqshlu :: inst_sqshlu_r_r_i
|
||||
emit_sqshlu :: emit_sqshlu_r_r_i
|
||||
inst_srshl :: inst_srshl_r_r_r
|
||||
emit_srshl :: emit_srshl_r_r_r
|
||||
inst_urshl :: inst_urshl_r_r_r
|
||||
emit_urshl :: emit_urshl_r_r_r
|
||||
inst_sshr :: inst_sshr_r_r_i
|
||||
emit_sshr :: emit_sshr_r_r_i
|
||||
inst_ushr :: inst_ushr_r_r_i
|
||||
emit_ushr :: emit_ushr_r_r_i
|
||||
inst_ssra :: inst_ssra_r_r_i
|
||||
emit_ssra :: emit_ssra_r_r_i
|
||||
inst_usra :: inst_usra_r_r_i
|
||||
emit_usra :: emit_usra_r_r_i
|
||||
inst_srshr :: inst_srshr_r_r_i
|
||||
emit_srshr :: emit_srshr_r_r_i
|
||||
inst_urshr :: inst_urshr_r_r_i
|
||||
emit_urshr :: emit_urshr_r_r_i
|
||||
inst_srsra :: inst_srsra_r_r_i
|
||||
emit_srsra :: emit_srsra_r_r_i
|
||||
inst_ursra :: inst_ursra_r_r_i
|
||||
emit_ursra :: emit_ursra_r_r_i
|
||||
inst_sshl :: inst_sshl_r_r_r
|
||||
emit_sshl :: emit_sshl_r_r_r
|
||||
inst_ushl :: inst_ushl_r_r_r
|
||||
emit_ushl :: emit_ushl_r_r_r
|
||||
inst_sli :: inst_sli_r_r_i
|
||||
emit_sli :: emit_sli_r_r_i
|
||||
inst_sri :: inst_sri_r_r_i
|
||||
emit_sri :: emit_sri_r_r_i
|
||||
inst_not_v :: inst_not_v_r_r
|
||||
emit_not_v :: emit_not_v_r_r
|
||||
inst_rbit_v :: inst_rbit_v_r_r
|
||||
|
||||
@@ -4009,5 +4009,124 @@ ENCODING_TABLE := #partial [Mnemonic][]Encoding{
|
||||
.FCVTXN2 = {
|
||||
{.FCVTXN2, {.V_4S, .V_2D, .NONE, .NONE}, {.VD, .VN, .NONE, .NONE}, 0x6E616800, 0xFFFFFC00, .NEON, {}},
|
||||
},
|
||||
|
||||
// Advanced SIMD shift by immediate.
|
||||
.SHL_V = {
|
||||
{.SHL_V, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x0F085400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SHL_V, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x4F085400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SHL_V, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x0F105400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SHL_V, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x4F105400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SHL_V, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x0F205400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SHL_V, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x4F205400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SHL_V, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x4F405400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.SLI = {
|
||||
{.SLI, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x2F085400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SLI, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x6F085400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SLI, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x2F105400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SLI, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x6F105400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SLI, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x2F205400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SLI, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x6F205400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SLI, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x6F405400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.SQSHLU = {
|
||||
{.SQSHLU, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x2F086400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SQSHLU, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x6F086400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SQSHLU, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x2F106400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SQSHLU, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x6F106400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SQSHLU, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x2F206400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SQSHLU, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x6F206400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SQSHLU, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x6F406400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.SQSHL_V = {
|
||||
{.SQSHL_V, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x0F087400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SQSHL_V, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x4F087400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SQSHL_V, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x0F107400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SQSHL_V, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x4F107400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SQSHL_V, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x0F207400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SQSHL_V, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x4F207400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SQSHL_V, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHL_IMM, .NONE}, 0x4F407400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.SSHR = {
|
||||
{.SSHR, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F080400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SSHR, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F080400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SSHR, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F100400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SSHR, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F100400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SSHR, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F200400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SSHR, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F200400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SSHR, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F400400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.USHR = {
|
||||
{.USHR, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F080400, 0xFFF8FC00, .NEON, {}},
|
||||
{.USHR, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F080400, 0xFFF8FC00, .NEON, {}},
|
||||
{.USHR, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F100400, 0xFFF0FC00, .NEON, {}},
|
||||
{.USHR, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F100400, 0xFFF0FC00, .NEON, {}},
|
||||
{.USHR, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F200400, 0xFFE0FC00, .NEON, {}},
|
||||
{.USHR, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F200400, 0xFFE0FC00, .NEON, {}},
|
||||
{.USHR, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F400400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.SRSHR = {
|
||||
{.SRSHR, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F082400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SRSHR, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F082400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SRSHR, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F102400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SRSHR, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F102400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SRSHR, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F202400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SRSHR, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F202400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SRSHR, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F402400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.URSHR = {
|
||||
{.URSHR, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F082400, 0xFFF8FC00, .NEON, {}},
|
||||
{.URSHR, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F082400, 0xFFF8FC00, .NEON, {}},
|
||||
{.URSHR, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F102400, 0xFFF0FC00, .NEON, {}},
|
||||
{.URSHR, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F102400, 0xFFF0FC00, .NEON, {}},
|
||||
{.URSHR, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F202400, 0xFFE0FC00, .NEON, {}},
|
||||
{.URSHR, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F202400, 0xFFE0FC00, .NEON, {}},
|
||||
{.URSHR, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F402400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.SSRA = {
|
||||
{.SSRA, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F081400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SSRA, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F081400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SSRA, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F101400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SSRA, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F101400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SSRA, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F201400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SSRA, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F201400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SSRA, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F401400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.USRA = {
|
||||
{.USRA, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F081400, 0xFFF8FC00, .NEON, {}},
|
||||
{.USRA, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F081400, 0xFFF8FC00, .NEON, {}},
|
||||
{.USRA, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F101400, 0xFFF0FC00, .NEON, {}},
|
||||
{.USRA, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F101400, 0xFFF0FC00, .NEON, {}},
|
||||
{.USRA, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F201400, 0xFFE0FC00, .NEON, {}},
|
||||
{.USRA, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F201400, 0xFFE0FC00, .NEON, {}},
|
||||
{.USRA, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F401400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.SRSRA = {
|
||||
{.SRSRA, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F083400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SRSRA, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F083400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SRSRA, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F103400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SRSRA, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F103400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SRSRA, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x0F203400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SRSRA, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F203400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SRSRA, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x4F403400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.URSRA = {
|
||||
{.URSRA, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F083400, 0xFFF8FC00, .NEON, {}},
|
||||
{.URSRA, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F083400, 0xFFF8FC00, .NEON, {}},
|
||||
{.URSRA, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F103400, 0xFFF0FC00, .NEON, {}},
|
||||
{.URSRA, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F103400, 0xFFF0FC00, .NEON, {}},
|
||||
{.URSRA, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F203400, 0xFFE0FC00, .NEON, {}},
|
||||
{.URSRA, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F203400, 0xFFE0FC00, .NEON, {}},
|
||||
{.URSRA, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F403400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
.SRI = {
|
||||
{.SRI, {.V_8B, .V_8B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F084400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SRI, {.V_16B, .V_16B, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F084400, 0xFFF8FC00, .NEON, {}},
|
||||
{.SRI, {.V_4H, .V_4H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F104400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SRI, {.V_8H, .V_8H, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F104400, 0xFFF0FC00, .NEON, {}},
|
||||
{.SRI, {.V_2S, .V_2S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x2F204400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SRI, {.V_4S, .V_4S, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F204400, 0xFFE0FC00, .NEON, {}},
|
||||
{.SRI, {.V_2D, .V_2D, .VEC_SHIFT, .NONE}, {.VD, .VN, .NEON_SHR_IMM, .NONE}, 0x6F404400, 0xFFC0FC00, .NEON, {}},
|
||||
},
|
||||
// SPECGEN:END
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -231,6 +231,51 @@ for _, fam in ipairs(DIFF) do
|
||||
sections[#sections+1] = "\t// Advanced SIMD "..fam.title..".\n" .. table.concat(blk, "\n")
|
||||
end
|
||||
|
||||
-- ---- NEON shift-by-immediate ----------------------------------------------
|
||||
-- <mnem> Vd.T, Vn.T, #shift. immh:immb carries element size + amount; mask is
|
||||
-- derived empirically by also varying the shift (canon = operand bits 0, other =
|
||||
-- all shift bits set). The encoder/decoder compute the amount (NEON_SHL/SHR_IMM).
|
||||
local ESIZE = {["8B"]=8,["16B"]=8,["4H"]=16,["8H"]=16,["2S"]=32,["4S"]=32,["1D"]=64,["2D"]=64}
|
||||
local SHIFT_ARR = {"8B","16B","4H","8H","2S","4S","2D"}
|
||||
local function emit_shift(mnem, llvm, dir)
|
||||
local enc_tok = (dir == "L") and ".NEON_SHL_IMM" or ".NEON_SHR_IMM"
|
||||
local rows = {}
|
||||
for _, a in ipairs(SHIFT_ARR) do
|
||||
local es = ESIZE[a]
|
||||
local canon = (dir == "L") and 0 or es
|
||||
local other = (dir == "L") and (es - 1) or 1
|
||||
local sa = ARR[a].asm
|
||||
local function mk(r, sh) return string.format("%s v%d.%s, v%d.%s, #%d", llvm, r, sa, r, sa, sh) end
|
||||
local bits, regV, shV = word(mk(0, canon)), word(mk(31, canon)), word(mk(0, other))
|
||||
if bits and regV and shV then
|
||||
local mask = bit.band(bit.bnot(bit.bor(bit.bxor(bits, regV), bit.bxor(bits, shV))), 0xFFFFFFFF)
|
||||
rows[#rows+1] = string.format(
|
||||
"\t\t{.%s, {.%s, .%s, .VEC_SHIFT, .NONE}, {.VD, .VN, %s, .NONE}, 0x%s, 0x%s, .NEON, {}},",
|
||||
mnem, ARR[a].vt, ARR[a].vt, enc_tok, bit.tohex(bits):upper(), bit.tohex(mask):upper())
|
||||
n_forms = n_forms + 1
|
||||
else
|
||||
skips[#skips+1] = mnem.." ."..a
|
||||
end
|
||||
end
|
||||
if #rows == 0 then return nil end
|
||||
n_mnem = n_mnem + 1
|
||||
return string.format("\t.%s = {\n%s\n\t},", mnem, table.concat(rows, "\n"))
|
||||
end
|
||||
local SHIFTS = {
|
||||
{"SHL_V","shl","L"},{"SLI","sli","L"},{"SQSHLU","sqshlu","L"},{"SQSHL_V","sqshl","L"},
|
||||
{"SSHR","sshr","R"},{"USHR","ushr","R"},{"SRSHR","srshr","R"},{"URSHR","urshr","R"},
|
||||
{"SSRA","ssra","R"},{"USRA","usra","R"},{"SRSRA","srsra","R"},{"URSRA","ursra","R"},
|
||||
{"SRI","sri","R"},
|
||||
}
|
||||
do
|
||||
local blk = {}
|
||||
for _, it in ipairs(SHIFTS) do
|
||||
local b = emit_shift(it[1], it[2], it[3])
|
||||
if b then blk[#blk+1] = b end
|
||||
end
|
||||
sections[#sections+1] = "\t// Advanced SIMD shift by immediate.\n" .. table.concat(blk, "\n")
|
||||
end
|
||||
|
||||
-- ---- splice into the SoT ---------------------------------------------------
|
||||
local region = "\t// SPECGEN:BEGIN\n" .. table.concat(sections, "\n\n") .. "\n\t// SPECGEN:END"
|
||||
local fh = assert(io.open(TABLE, "r")); local src = fh:read("*a"); fh:close()
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user