mirror of
https://github.com/odin-lang/Odin.git
synced 2026-06-19 08:32:33 +00:00
rexcode/arm32: MVE VMLSV/VMLSVA (correct 3-bit Q regs); drop placeholders
Implement VMLSV/VMLSVA (MVE multiply-subtract reduce) properly: new VN_Q_MVE (Qn at 19:17) and VM_Q_MVE (Qm at 3:1) encodings -- the actual 3-bit MVE Q fields -- with Rd at 15:12 (RDLO_A32). The earlier collision was from reusing the 4-bit VN_Q (19:16) and RD_T32 (11:8), which place the fields wrong; byte-exact vs llvm-mc now with distinct Qn/Qm/Rd. Drop three placeholder/redundant enum entries: VRINT and VPRINT (not real instructions -- llvm rejects bare 'vrint'; VPRINT is a printf-like debug pseudo-op), and VRSHL_MVE (the author's own comment marks it a placeholder; 'vrshl q,q,q' already decodes via VRSHL's MVE form). 600 tests green, verify matches llvm-mc.
This commit is contained in:
@@ -356,6 +356,10 @@ unpack_operand :: proc(word: u32, enc: Operand_Encoding, ot: Operand_Type) -> Op
|
||||
return op_imm(((word >> 12) & 1) == 1 ? 270 : 90)
|
||||
case .MVE_ROT_CMLA:
|
||||
return op_imm(i64((word >> 23) & 0x3) * 90)
|
||||
case .VN_Q_MVE:
|
||||
return op_reg(Register(REG_QPR | u16((word >> 17) & 0x7)))
|
||||
case .VM_Q_MVE:
|
||||
return op_reg(Register(REG_QPR | u16((word >> 1) & 0x7)))
|
||||
case .VD_Q:
|
||||
n := (((word >> 22) & 1) << 4 | ((word >> 12) & 0xF)) >> 1
|
||||
return op_reg(Register(REG_QPR | u16(n)))
|
||||
|
||||
@@ -440,6 +440,10 @@ pack_operand_inline :: #force_inline proc(
|
||||
return (u32(op.immediate) == 270 ? 1 : 0) << 12
|
||||
case .MVE_ROT_CMLA:
|
||||
return ((u32(op.immediate) / 90) & 0x3) << 23
|
||||
case .VN_Q_MVE:
|
||||
return (u32(reg_hw(op.reg)) & 0x7) << 17
|
||||
case .VM_Q_MVE:
|
||||
return (u32(reg_hw(op.reg)) & 0x7) << 1
|
||||
case .VFP_IMM8:
|
||||
// Run the VFP 8-bit float encoder; the user supplies the wire-format
|
||||
// 32-bit float bit pattern (for F32). The encoder finds the abcdefgh.
|
||||
|
||||
@@ -316,6 +316,9 @@ Operand_Encoding :: enum u8 {
|
||||
// MVE_ROT_HCADD: #90/#270 -> bit 12; MVE_ROT_CMLA: #0/90/180/270 -> bits 24:23
|
||||
MVE_ROT_HCADD,
|
||||
MVE_ROT_CMLA,
|
||||
// MVE 3-bit Q registers (Q0..Q7): Qn at bits 19:17, Qm at bits 3:1.
|
||||
VN_Q_MVE,
|
||||
VM_Q_MVE,
|
||||
VFP_IMM8, // VFP immediate (VMOV.F32/F64 #imm)
|
||||
NEON_IMM8_ABCDEFGH, // bits 18-16 (abc) + bits 3-0 (defgh)
|
||||
NEON_CMODE, // bits 11-8 (cmode for VMOV/VMVN immediate)
|
||||
|
||||
@@ -1298,6 +1298,10 @@ inst_vmlav_r_q_q :: #force_inline proc "contextless" (dst: Regis
|
||||
emit_vmlav_r_q_q :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_vmlav_r_q_q(dst, src, src2)) }
|
||||
inst_vmlava_r_q_q :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .VMLAVA, operand_count = 3, mode = .T32, cond = 14, length = 4, ops = {op_reg(dst), op_reg(src), op_reg(src2), {}}} }
|
||||
emit_vmlava_r_q_q :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_vmlava_r_q_q(dst, src, src2)) }
|
||||
inst_vmlsv_r_q_q :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .VMLSV, operand_count = 3, mode = .T32, cond = 14, length = 4, ops = {op_reg(dst), op_reg(src), op_reg(src2), {}}} }
|
||||
emit_vmlsv_r_q_q :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_vmlsv_r_q_q(dst, src, src2)) }
|
||||
inst_vmlsva_r_q_q :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .VMLSVA, operand_count = 3, mode = .T32, cond = 14, length = 4, ops = {op_reg(dst), op_reg(src), op_reg(src2), {}}} }
|
||||
emit_vmlsva_r_q_q :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_vmlsva_r_q_q(dst, src, src2)) }
|
||||
inst_vcmul_q_q_q :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .VCMUL, operand_count = 3, mode = .T32, cond = 14, length = 4, ops = {op_reg(dst), op_reg(src), op_reg(src2), {}}} }
|
||||
emit_vcmul_q_q_q :: #force_inline proc(instructions: ^[dynamic]Instruction, dst: Register, src: Register, src2: Register) { append(instructions, inst_vcmul_q_q_q(dst, src, src2)) }
|
||||
inst_vhcadd_q_q_q :: #force_inline proc "contextless" (dst: Register, src: Register, src2: Register) -> Instruction { return Instruction{mnemonic = .VHCADD, operand_count = 3, mode = .T32, cond = 14, length = 4, ops = {op_reg(dst), op_reg(src), op_reg(src2), {}}} }
|
||||
@@ -2551,6 +2555,10 @@ inst_vmlav :: inst_vmlav_r_q_q
|
||||
emit_vmlav :: emit_vmlav_r_q_q
|
||||
inst_vmlava :: inst_vmlava_r_q_q
|
||||
emit_vmlava :: emit_vmlava_r_q_q
|
||||
inst_vmlsv :: inst_vmlsv_r_q_q
|
||||
emit_vmlsv :: emit_vmlsv_r_q_q
|
||||
inst_vmlsva :: inst_vmlsva_r_q_q
|
||||
emit_vmlsva :: emit_vmlsva_r_q_q
|
||||
inst_vcmul :: inst_vcmul_q_q_q
|
||||
emit_vcmul :: emit_vcmul_q_q_q
|
||||
inst_vhcadd :: inst_vhcadd_q_q_q
|
||||
|
||||
@@ -306,7 +306,7 @@ Mnemonic :: enum u16 {
|
||||
SHA256H, SHA256H2, SHA256SU0, SHA256SU1,
|
||||
|
||||
// -- VFP rounding (ARMv8 FEAT_FP) ----------------------------------------
|
||||
VRINT, VJCVT, // VJCVT: F64-to-S32 with FPSCR.RM rounding
|
||||
VJCVT, // VJCVT: F64-to-S32 with FPSCR.RM rounding
|
||||
|
||||
// -- Dot Product (FEAT_DotProd) ------------------------------------------
|
||||
VSDOT, VUDOT,
|
||||
@@ -465,7 +465,6 @@ Mnemonic :: enum u16 {
|
||||
// Bit reverse + shifts unique to MVE
|
||||
VBRSR, // bit reverse with shift right
|
||||
VSHLC, // shift left with carry
|
||||
VRSHL_MVE, // (placeholder if needed; usually VRSHL)
|
||||
VDDUP, // decrement and duplicate
|
||||
VIDUP, // increment and duplicate
|
||||
VDWDUP, // decrement-wrap and duplicate
|
||||
@@ -501,7 +500,6 @@ Mnemonic :: enum u16 {
|
||||
VQRDMLSDH, VQRDMLSDHX,
|
||||
|
||||
// Misc
|
||||
VPRINT, // printf-like debug op (rare)
|
||||
VHCADD_SAT, // (rarely used)
|
||||
VCMLA_MVE, // (MVE form; VCMLA already exists)
|
||||
|
||||
|
||||
@@ -3695,6 +3695,10 @@ ENCODING_TABLE := #partial [Mnemonic][]Encoding{
|
||||
// mask (the MVE convention); the complex ops encode the rotation immediate.
|
||||
.VHCADD_SAT = { {.VHCADD_SAT, {.QPR, .QPR, .QPR, .IMM}, {.VD_Q, .VN_Q, .VM_Q, .MVE_ROT_HCADD}, 0xEE000F00, 0xFFE10FF1, .MVE_INT, .T32, {thumb32=true, cond_in_28=false}} },
|
||||
.VCMLA_MVE = { {.VCMLA_MVE, {.QPR, .QPR, .QPR, .IMM}, {.VD_Q, .VN_Q, .VM_Q, .MVE_ROT_CMLA}, 0xFC200840, 0xFE611FF1, .MVE_FP, .T32, {thumb32=true, cond_in_28=false}} },
|
||||
// MVE multiply-subtract reduce (Rd at 15:12, Qn at 19:17, Qm at 3:1 -- the
|
||||
// proper 3-bit MVE Q fields, .s16 form). VMLSVA accumulates (bit 5).
|
||||
.VMLSV = { {.VMLSV, {.GPR, .QPR, .QPR, .NONE}, {.RDLO_A32, .VN_Q_MVE, .VM_Q_MVE, .NONE}, 0xEEF00E01, 0xFFF10FF1, .MVE_INT, .T32, {thumb32=true, cond_in_28=false}} },
|
||||
.VMLSVA = { {.VMLSVA, {.GPR, .QPR, .QPR, .NONE}, {.RDLO_A32, .VN_Q_MVE, .VM_Q_MVE, .NONE}, 0xEEF00E21, 0xFFF10FF1, .MVE_INT, .T32, {thumb32=true, cond_in_28=false}} },
|
||||
|
||||
// SPECGEN:BEGIN
|
||||
.VADDL = {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -8,7 +8,7 @@ package rexcode_arm32_generated
|
||||
import lib "../.."
|
||||
|
||||
@(rodata)
|
||||
ENCODE_FORMS := [1673]lib.Encoding{
|
||||
ENCODE_FORMS := [1675]lib.Encoding{
|
||||
// .AND
|
||||
{ .AND, {.GPR,.GPR,.IMM_MOD,.NONE}, {.RD,.RN_A32,.A32_IMM_MOD,.NONE}, 0x02000000, 0x0FE00000, .BASE, .A32, {} },
|
||||
{ .AND, {.GPR,.GPR,.GPR_SHIFTED,.NONE}, {.RD,.RN_A32,.RM_A32,.NONE}, 0x00000000, 0x0FE00010, .BASE, .A32, {} },
|
||||
@@ -2172,6 +2172,10 @@ ENCODE_FORMS := [1673]lib.Encoding{
|
||||
{ .VMLAV, {.GPR,.QPR,.QPR,.NONE}, {.RD_T32,.VN_Q,.VM_Q,.NONE}, 0xEEB00F00, 0xEFB10F51, .MVE_INT, .T32, {thumb32=true} },
|
||||
// .VMLAVA
|
||||
{ .VMLAVA, {.GPR,.QPR,.QPR,.NONE}, {.RD_T32,.VN_Q,.VM_Q,.NONE}, 0xEEB00F20, 0xEFB10F51, .MVE_INT, .T32, {thumb32=true} },
|
||||
// .VMLSV
|
||||
{ .VMLSV, {.GPR,.QPR,.QPR,.NONE}, {.RDLO_A32,.VN_Q_MVE,.VM_Q_MVE,.NONE}, 0xEEF00E01, 0xFFF10FF1, .MVE_INT, .T32, {thumb32=true} },
|
||||
// .VMLSVA
|
||||
{ .VMLSVA, {.GPR,.QPR,.QPR,.NONE}, {.RDLO_A32,.VN_Q_MVE,.VM_Q_MVE,.NONE}, 0xEEF00E21, 0xFFF10FF1, .MVE_INT, .T32, {thumb32=true} },
|
||||
// .VCMUL
|
||||
{ .VCMUL, {.QPR,.QPR,.QPR,.NONE}, {.VD_Q,.VN_Q,.VM_Q,.NONE}, 0xEE300E00, 0xEFB10F51, .MVE_FP, .T32, {thumb32=true} },
|
||||
// .VHCADD
|
||||
@@ -2722,7 +2726,6 @@ ENCODE_RUNS := [lib.Mnemonic]lib.Encode_Run{
|
||||
.SHA256H2 = { 1365, 1},
|
||||
.SHA256SU0 = { 1366, 1},
|
||||
.SHA256SU1 = { 1367, 1},
|
||||
.VRINT = { 1368, 0},
|
||||
.VJCVT = { 1368, 1},
|
||||
.VSDOT = { 1369, 2},
|
||||
.VUDOT = { 1371, 2},
|
||||
@@ -2873,76 +2876,74 @@ ENCODE_RUNS := [lib.Mnemonic]lib.Encode_Run{
|
||||
.VRMLSLDAVHAX = { 1603, 1},
|
||||
.VMLAV = { 1604, 1},
|
||||
.VMLAVA = { 1605, 1},
|
||||
.VMLSV = { 1606, 0},
|
||||
.VMLSVA = { 1606, 0},
|
||||
.VCMUL = { 1606, 1},
|
||||
.VHCADD = { 1607, 1},
|
||||
.VBRSR = { 1608, 1},
|
||||
.VSHLC = { 1609, 1},
|
||||
.VRSHL_MVE = { 1610, 0},
|
||||
.VDDUP = { 1610, 1},
|
||||
.VIDUP = { 1611, 1},
|
||||
.VDWDUP = { 1612, 1},
|
||||
.VIWDUP = { 1613, 1},
|
||||
.VMOVNB = { 1614, 1},
|
||||
.VMOVNT = { 1615, 1},
|
||||
.VQMOVNB = { 1616, 1},
|
||||
.VQMOVNT = { 1617, 1},
|
||||
.VQMOVUNB = { 1618, 1},
|
||||
.VQMOVUNT = { 1619, 1},
|
||||
.VSHLLB = { 1620, 1},
|
||||
.VSHLLT = { 1621, 1},
|
||||
.VMULLB = { 1622, 1},
|
||||
.VMULLT = { 1623, 1},
|
||||
.VMLALB = { 1624, 1},
|
||||
.VMLALT = { 1625, 1},
|
||||
.VMLSLB = { 1626, 1},
|
||||
.VMLSLT = { 1627, 1},
|
||||
.VSHRNB = { 1628, 1},
|
||||
.VSHRNT = { 1629, 1},
|
||||
.VRSHRNB = { 1630, 1},
|
||||
.VRSHRNT = { 1631, 1},
|
||||
.VQSHRNB = { 1632, 1},
|
||||
.VQSHRNT = { 1633, 1},
|
||||
.VQRSHRNB = { 1634, 1},
|
||||
.VQRSHRNT = { 1635, 1},
|
||||
.VQSHRUNB = { 1636, 1},
|
||||
.VQSHRUNT = { 1637, 1},
|
||||
.VQRSHRUNB = { 1638, 1},
|
||||
.VQRSHRUNT = { 1639, 1},
|
||||
.VMOV_Q_R = { 1640, 1},
|
||||
.VMOV_R_Q = { 1641, 1},
|
||||
.VMOV_2GPR_Q = { 1642, 1},
|
||||
.VQDMLADH = { 1643, 1},
|
||||
.VQDMLADHX = { 1644, 1},
|
||||
.VQDMLSDH = { 1645, 1},
|
||||
.VQDMLSDHX = { 1646, 1},
|
||||
.VQRDMLADH = { 1647, 1},
|
||||
.VQRDMLADHX = { 1648, 1},
|
||||
.VQRDMLSDH = { 1649, 1},
|
||||
.VQRDMLSDHX = { 1650, 1},
|
||||
.VPRINT = { 1651, 0},
|
||||
.VHCADD_SAT = { 1651, 1},
|
||||
.VCMLA_MVE = { 1652, 1},
|
||||
.VLDRB = { 1653, 1},
|
||||
.VLDRH = { 1654, 1},
|
||||
.VLDRW = { 1655, 1},
|
||||
.VLDRD = { 1656, 1},
|
||||
.VSTRB = { 1657, 1},
|
||||
.VSTRH = { 1658, 1},
|
||||
.VSTRW = { 1659, 1},
|
||||
.VSTRD = { 1660, 1},
|
||||
.VLD20 = { 1661, 1},
|
||||
.VLD21 = { 1662, 1},
|
||||
.VLD40 = { 1663, 1},
|
||||
.VLD41 = { 1664, 1},
|
||||
.VLD42 = { 1665, 1},
|
||||
.VLD43 = { 1666, 1},
|
||||
.VST20 = { 1667, 1},
|
||||
.VST21 = { 1668, 1},
|
||||
.VST40 = { 1669, 1},
|
||||
.VST41 = { 1670, 1},
|
||||
.VST42 = { 1671, 1},
|
||||
.VST43 = { 1672, 1},
|
||||
._COUNT = { 1673, 0},
|
||||
.VMLSV = { 1606, 1},
|
||||
.VMLSVA = { 1607, 1},
|
||||
.VCMUL = { 1608, 1},
|
||||
.VHCADD = { 1609, 1},
|
||||
.VBRSR = { 1610, 1},
|
||||
.VSHLC = { 1611, 1},
|
||||
.VDDUP = { 1612, 1},
|
||||
.VIDUP = { 1613, 1},
|
||||
.VDWDUP = { 1614, 1},
|
||||
.VIWDUP = { 1615, 1},
|
||||
.VMOVNB = { 1616, 1},
|
||||
.VMOVNT = { 1617, 1},
|
||||
.VQMOVNB = { 1618, 1},
|
||||
.VQMOVNT = { 1619, 1},
|
||||
.VQMOVUNB = { 1620, 1},
|
||||
.VQMOVUNT = { 1621, 1},
|
||||
.VSHLLB = { 1622, 1},
|
||||
.VSHLLT = { 1623, 1},
|
||||
.VMULLB = { 1624, 1},
|
||||
.VMULLT = { 1625, 1},
|
||||
.VMLALB = { 1626, 1},
|
||||
.VMLALT = { 1627, 1},
|
||||
.VMLSLB = { 1628, 1},
|
||||
.VMLSLT = { 1629, 1},
|
||||
.VSHRNB = { 1630, 1},
|
||||
.VSHRNT = { 1631, 1},
|
||||
.VRSHRNB = { 1632, 1},
|
||||
.VRSHRNT = { 1633, 1},
|
||||
.VQSHRNB = { 1634, 1},
|
||||
.VQSHRNT = { 1635, 1},
|
||||
.VQRSHRNB = { 1636, 1},
|
||||
.VQRSHRNT = { 1637, 1},
|
||||
.VQSHRUNB = { 1638, 1},
|
||||
.VQSHRUNT = { 1639, 1},
|
||||
.VQRSHRUNB = { 1640, 1},
|
||||
.VQRSHRUNT = { 1641, 1},
|
||||
.VMOV_Q_R = { 1642, 1},
|
||||
.VMOV_R_Q = { 1643, 1},
|
||||
.VMOV_2GPR_Q = { 1644, 1},
|
||||
.VQDMLADH = { 1645, 1},
|
||||
.VQDMLADHX = { 1646, 1},
|
||||
.VQDMLSDH = { 1647, 1},
|
||||
.VQDMLSDHX = { 1648, 1},
|
||||
.VQRDMLADH = { 1649, 1},
|
||||
.VQRDMLADHX = { 1650, 1},
|
||||
.VQRDMLSDH = { 1651, 1},
|
||||
.VQRDMLSDHX = { 1652, 1},
|
||||
.VHCADD_SAT = { 1653, 1},
|
||||
.VCMLA_MVE = { 1654, 1},
|
||||
.VLDRB = { 1655, 1},
|
||||
.VLDRH = { 1656, 1},
|
||||
.VLDRW = { 1657, 1},
|
||||
.VLDRD = { 1658, 1},
|
||||
.VSTRB = { 1659, 1},
|
||||
.VSTRH = { 1660, 1},
|
||||
.VSTRW = { 1661, 1},
|
||||
.VSTRD = { 1662, 1},
|
||||
.VLD20 = { 1663, 1},
|
||||
.VLD21 = { 1664, 1},
|
||||
.VLD40 = { 1665, 1},
|
||||
.VLD41 = { 1666, 1},
|
||||
.VLD42 = { 1667, 1},
|
||||
.VLD43 = { 1668, 1},
|
||||
.VST20 = { 1669, 1},
|
||||
.VST21 = { 1670, 1},
|
||||
.VST40 = { 1671, 1},
|
||||
.VST41 = { 1672, 1},
|
||||
.VST42 = { 1673, 1},
|
||||
.VST43 = { 1674, 1},
|
||||
._COUNT = { 1675, 0},
|
||||
}
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
Before Width: | Height: | Size: 3.3 KiB After Width: | Height: | Size: 3.3 KiB |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user