// rexcode · Brendan Punsky (dotbmp@github), original author package rexcode_arm32 import "core:rexcode/isa" // ============================================================================= // AArch32 DECODER // ============================================================================= // // Variable-length: A32 = 4 bytes, T16 = 2 bytes, T32 = 4 bytes (two halfwords). // The decoder takes a Mode parameter telling it whether to interpret bytes // as A32 or T32. In T32 mode, the first halfword's top 5 bits indicate // whether the instruction is 16 or 32 bits (top in {11101, 11110, 11111} = 32). // // Operation: // // PASS 1 - For each instruction, read the appropriate halfword(s), match // against ENCODING_TABLE entries for the active mode, build the // Instruction with extracted operands. Branch operands are emitted // as RELATIVE with the absolute target byte offset; the post-pass // converts these into Label_Definitions via infer_labels_from_branches. // // Like riscv, decoding is structured as a linear-scan by mnemonic with a // `(word & mask) == bits` test. For performance, future work could build a // decode index table (see arm64/decoding_tables.odin pattern). Instruction_Info :: struct { offset: u32, decode_entry: u16, _: u16, } #assert(size_of(Instruction_Info) == 8) decode :: proc( data: []u8, relocs: []Relocation, instructions: ^[dynamic]Instruction, inst_info: ^[dynamic]Instruction_Info, label_defs: ^[dynamic]Label_Definition, errors: ^[dynamic]Error, mode: Mode = .A32, ) -> (byte_count: u32, ok: bool) { n_bytes := u32(len(data)) if mode == .T32 { n_bytes = n_bytes & ~u32(1) } else { n_bytes = n_bytes & ~u32(3) } errors_start := u32(len(errors)) pending_branches: [dynamic]isa.Branch_Target defer delete(pending_branches) for byte_count < n_bytes { word: u32 ilen: u32 = 4 if mode == .A32 { if byte_count + 4 > n_bytes { break } word = read_u32_le(data, byte_count) } else { // T32: 16 or 32 bit hword_hi := read_u16_le(data, byte_count) top5 := (hword_hi >> 11) & 0x1F if top5 == 0x1D || top5 == 0x1E || top5 == 0x1F { if byte_count + 4 > n_bytes { break } hword_lo := read_u16_le(data, byte_count + 2) // Pack: bits = low_halfword | (high_halfword << 16) word = u32(hword_lo) | (u32(hword_hi) << 16) ilen = 4 } else { word = u32(hword_hi) ilen = 2 } } inst: Instruction info: Instruction_Info info.offset = byte_count if !find_and_decode(word, mode, ilen, &inst, &info) { append(errors, Error{inst_idx = byte_count, code = .INVALID_OPCODE}) inst = Instruction{mnemonic = .INVALID, length = u8(ilen), mode = mode} } else { inst.length = u8(ilen) inst.mode = mode // Pull condition out of bits 31:28 for conditional A32 entries. // The find_and_decode helper has already set inst.cond using the // mask-based test (mask bits 31:28 == 0 ⇒ conditional). See // encoding_types.odin for the rationale. inst_idx := u32(len(instructions)) for slot in 0..= 0 { append(&pending_branches, isa.Branch_Target{ inst_idx = inst_idx, op_idx = slot, target = u32(op.relative), }) } } } append(instructions, inst) append(inst_info, info) byte_count += ilen } isa.infer_labels_from_branches(pending_branches[:], byte_count, label_defs, relocs) ok = u32(len(errors)) == errors_start return } // ============================================================================= // Decode dispatch via primary-opcode index tables (generated) // ============================================================================= @(private="file") find_and_decode :: proc(word: u32, mode: Mode, ilen: u32, inst: ^Instruction, info: ^Instruction_Info) -> bool { range: Decode_Index if mode == .A32 { range = DECODE_INDEX_A32[(word >> 20) & 0xFF] } else if ilen == 4 { // Try the T32 secondary index first (sub-bucketed by bits 24:20). primary := (word >> 25) & 0x7F sub := (word >> 20) & 0x1F sub_range := DECODE_INDEX_T32_SUB[primary * DECODE_T32_SUB_BUCKETS + sub] if sub_range.count > 0 { range = sub_range } else { range = DECODE_INDEX_T32[primary] } } else { range = DECODE_INDEX_T16[(word >> 10) & 0x3F] } if range.count == 0 { return false } base := int(range.start) cnt := int(range.count) for i in 0..> 28) == 0 { inst.cond = u8((word >> 28) & 0xF) } else { inst.cond = 14 // AL / unconditional } if e.flags.sets_flags { inst.flags.sets_flags = true } for _, k in e.enc { if e.enc[k] == .NONE { continue } op := unpack_operand(word, e.enc[k], e.ops[k]) inst.ops[k] = op inst.operand_count = u8(k + 1) } // For slots where the form declares an Operand_Type but the wire // encoding is .NONE (e.g. MOVW's imm16, SVC's imm24, T16 LDR's imm5), // fabricate a zero-valued operand of the right Operand_Kind so the // re-encode shape match succeeds. The encoder won't pack anything for // those slots since enc is .NONE; carrying a placeholder lets the // user-facing API still show the slot. for _, k in e.enc { if e.enc[k] != .NONE { continue } if e.ops[k] == .NONE { continue } inst.ops[k] = default_operand_for(e.ops[k]) inst.operand_count = u8(k + 1) } return true } return false } // Produce a zero-valued operand of the kind implied by an Operand_Type. Used // when a form's wire encoding is .NONE for a slot but the operand type slot // is non-NONE; we want a placeholder of the right kind so the encoder's // shape_matches accepts the re-encode. @(private="file") default_operand_for :: proc(ot: Operand_Type) -> Operand { #partial switch ot { case .GPR, .GPR_NOPC, .GPR_NOSP, .GPR_LOW, .GPR_SHIFTED, .GPR_RSR: return op_reg(R0) case .GPR_LIST: return op_reg_list(0) case .SPR: return op_reg(S0) case .DPR: return op_reg(D0) case .QPR: return op_reg(Q0) case .DPR_ELEM: return op_reg(D0) case .QPR_ELEM: return op_reg(Q0) case .SPR_ELEM: return op_reg(S0) case .SPR_LIST, .DPR_LIST: return op_reg_list(0) case .QPR_MVE_LIST: return op_reg_list(0) case .VPR, .QPR_MVE: return op_reg(Q0) case .MEM: return op_mem(mem_imm(R0, 0)) case .REL24, .REL24_T32, .REL20, .REL11, .REL8, .REL_LDR_LITERAL: return op_rel_offset(0) } return op_imm(0) } // ============================================================================= // Operand un-packers (inverse of pack_operand in encoder.odin) // ============================================================================= @(private="file") unpack_operand :: proc(word: u32, enc: Operand_Encoding, ot: Operand_Type) -> Operand { switch enc { case .NONE, .IMPL: return op_imm(0) // ---- GPR slots ---- case .RD, .RT_A32, .RA_A32, .RDLO_A32: return op_reg(Register(REG_GPR | u16((word >> 12) & 0xF))) case .RT2_A32: return op_reg(Register(REG_GPR | u16((word >> 16) & 0xF))) case .RN_A32, .RDHI_A32: reg := Register(REG_GPR | u16((word >> 16) & 0xF)) // Some atomics/exclusives use .MEM as the operand type with .RN_A32 as // the wire encoding (the assembly is `INSN Rd, Rt, [Rn]` — Rn appears // inside brackets). Wrap into a bare Memory operand so the encoder // shape match accepts it on roundtrip. if ot == .MEM { return op_mem(mem_imm(reg, 0)) } return op_reg(reg) case .RM_A32: reg := Register(REG_GPR | u16(word & 0xF)) #partial switch ot { case .GPR_RSR: // Register-shifted register: Rs in bits 11..8, shift type in 6..5, // bit 4 = 1. We map the 2-bit shift type onto the .{LSL,LSR,ASR, // ROR}_REG markers so the encoder shape-match can distinguish // imm-shift from reg-shift. Rs is stored in shift_amt. st_bits := (word >> 5) & 0x3 st := Shift_Type(u8(st_bits) + u8(Shift_Type.LSL_REG)) rs := u8((word >> 8) & 0xF) return Operand{reg = reg, kind = .REGISTER, size = 4, shift_type = st, shift_amt = rs, cond = 14} case .GPR_SHIFTED: // Imm-shift: amount in bits 11..7, type in 6..5, bit 4 = 0. st := Shift_Type((word >> 5) & 0x3) amt := u8((word >> 7) & 0x1F) if st == .ROR && amt == 0 { return op_reg_shifted(reg, .RRX, 0) } if st == .LSL && amt == 0 { return op_reg(reg) } return op_reg_shifted(reg, st, amt) } return op_reg(reg) case .RS_A32: return op_reg(Register(REG_GPR | u16((word >> 8) & 0xF))) case .RD_T32: return op_reg(Register(REG_GPR | u16((word >> 8) & 0xF))) case .RN_T32: reg := Register(REG_GPR | u16((word >> 16) & 0xF)) if ot == .MEM { return op_mem(mem_imm(reg, 0)) } return op_reg(reg) case .RM_T32: return op_reg(Register(REG_GPR | u16(word & 0xF))) case .RT_T32, .RA_T32: return op_reg(Register(REG_GPR | u16((word >> 12) & 0xF))) case .RT2_T32: return op_reg(Register(REG_GPR | u16((word >> 8) & 0xF))) case .RD_T16_LO: return op_reg(Register(REG_GPR | u16(word & 0x7))) case .RM_T16_LO, .RN_T16_LO: return op_reg(Register(REG_GPR | u16((word >> 3) & 0x7))) case .RD_T16_HI: rd := (word & 0x7) | ((word >> 7) & 1) << 3 return op_reg(Register(REG_GPR | u16(rd))) case .RM_T16_HI: return op_reg(Register(REG_GPR | u16((word >> 3) & 0xF))) // ---- Modified immediates (decoded to their effective 32-bit value) ---- case .A32_IMM_MOD, .A32_IMM12_ROT: return op_imm(i64(decode_a32_modimm(word & 0xFFF))) case .T32_IMM_MOD: i_bit := (word >> 26) & 1 imm3 := (word >> 12) & 0x7 imm8 := word & 0xFF f12 := (i_bit << 11) | (imm3 << 8) | imm8 return op_imm(i64(decode_t32_modimm(f12))) // ---- A32 immediates ---- case .A32_IMM12: return op_imm(i64(word & 0xFFF)) case .A32_IMM_SHIFT: return op_imm(i64((word >> 7) & 0x1F)) case .A32_SHIFT_TYPE: return op_imm(i64((word >> 5) & 0x3)) case .A32_IMM24: // Ambiguous: A32_IMM24 is used both for branch displacements (B/BL, // shape REL24) and for the 24-bit `imm` of SVC (shape IMM). Use the // form's operand type to disambiguate. if ot == .IMM { return op_imm(i64(word & 0xFFFFFF)) } v := i32(word & 0xFFFFFF) if v & 0x800000 != 0 { v |= -0x1000000 } return op_rel_offset(i64(v << 2)) case .A32_IMM4: return op_imm(i64(word & 0xF)) case .A32_IMM4_ROTATE: return op_imm(i64((word >> 8) & 0xF)) case .A32_IMM5_LSB: return op_imm(i64((word >> 7) & 0x1F)) case .A32_IMM5_W: return op_imm(i64((word >> 16) & 0x1F)) case .A32_REG_LIST: return op_reg_list(u16(word & 0xFFFF)) // ---- VFP/NEON split fields ---- case .VD_S: n := ((word >> 12) & 0xF) << 1 | ((word >> 22) & 1) return op_reg(Register(REG_SPR | u16(n))) case .VN_S: n := ((word >> 16) & 0xF) << 1 | ((word >> 7) & 1) return op_reg(Register(REG_SPR | u16(n))) case .VM_S: n := (word & 0xF) << 1 | ((word >> 5) & 1) return op_reg(Register(REG_SPR | u16(n))) case .VD_D: n := ((word >> 22) & 1) << 4 | ((word >> 12) & 0xF) return op_reg(Register(REG_DPR | u16(n))) case .VN_D: n := ((word >> 7) & 1) << 4 | ((word >> 16) & 0xF) return op_reg(Register(REG_DPR | u16(n))) case .VM_D: n := ((word >> 5) & 1) << 4 | (word & 0xF) return op_reg(Register(REG_DPR | u16(n))) case .NEON_VM_SCALAR16: lane := ((word >> 5) & 1) << 1 | ((word >> 3) & 1) return op_dpr_lane(Register(REG_DPR | u16(word & 0x7)), u8(lane)) case .NEON_VM_SCALAR32: return op_dpr_lane(Register(REG_DPR | u16(word & 0xF)), u8((word >> 5) & 1)) case .VMOV_LANE_8, .VMOV_LANE_16, .VMOV_LANE_32: n := ((word >> 7) & 1) << 4 | ((word >> 16) & 0xF) lane: u32 = 0 if enc == .VMOV_LANE_8 { lane = ((word >> 21) & 1) << 2 | ((word >> 6) & 1) << 1 | ((word >> 5) & 1) } else if enc == .VMOV_LANE_16 { lane = ((word >> 21) & 1) << 1 | ((word >> 6) & 1) } else { lane = (word >> 21) & 1 } return op_dpr_lane(Register(REG_DPR | u16(n)), u8(lane)) case .MVE_ROT_HCADD: return op_imm(((word >> 12) & 1) == 1 ? 270 : 90) case .MVE_ROT_CMLA: return op_imm(i64((word >> 23) & 0x3) * 90) case .VN_Q_MVE: return op_reg(Register(REG_QPR | u16((word >> 17) & 0x7))) case .VM_Q_MVE: return op_reg(Register(REG_QPR | u16((word >> 1) & 0x7))) case .VD_Q: n := (((word >> 22) & 1) << 4 | ((word >> 12) & 0xF)) >> 1 return op_reg(Register(REG_QPR | u16(n))) case .VN_Q: n := (((word >> 7) & 1) << 4 | ((word >> 16) & 0xF)) >> 1 return op_reg(Register(REG_QPR | u16(n))) case .VM_Q: n := (((word >> 5) & 1) << 4 | (word & 0xF)) >> 1 return op_reg(Register(REG_QPR | u16(n))) // ---- Memory ---- case .MEM_IMM12_OFFSET: base := Register(REG_GPR | u16((word >> 16) & 0xF)) u_bit := (word >> 23) & 1 disp := i32(word & 0xFFF) if u_bit == 0 { disp = -disp } return op_mem(mem_imm(base, disp)) case .MEM_IMM8_OFFSET: base := Register(REG_GPR | u16((word >> 16) & 0xF)) u_bit := (word >> 23) & 1 disp := i32(((word >> 8) & 0xF) << 4 | (word & 0xF)) if u_bit == 0 { disp = -disp } return op_mem(mem_imm(base, disp)) case .MEM_REG_OFFSET: base := Register(REG_GPR | u16((word >> 16) & 0xF)) idx := Register(REG_GPR | u16(word & 0xF)) sign: i8 = (word >> 23) & 1 != 0 ? 1 : -1 return op_mem(mem_reg(base, idx, sign)) case .MEM_DOUBLEREG: base := Register(REG_GPR | u16((word >> 16) & 0xF)) idx := Register(REG_GPR | u16(word & 0xF)) return op_mem(mem_reg(base, idx)) // ---- Misc ---- case .BARRIER_TYPE: return op_imm(i64(word & 0xF)) case .HINT_FIELD: return op_imm(i64(word & 0xFF)) case .IT_MASK: return op_imm(i64(word & 0xFF)) case .CPS_IFLAGS: return op_imm(i64(word & 0x1FF)) case .PSR_FIELD_MASK: return op_imm(i64(decode_psr_field(word))) case .SYSM_FIELD: return op_imm(i64(word & 0xFF)) case .COPROC_NUM_FIELD: return op_imm(i64((word >> 8) & 0xF)) case .COPROC_OPC1_FIELD: return op_imm(i64((word >> 20) & 0xF)) case .COPROC_OPC2_FIELD: return op_imm(i64((word >> 5) & 0x7)) case .COPROC_CRN_FIELD: return op_reg(Register(REG_COPROC | u16((word >> 16) & 0xF))) case .COPROC_CRM_FIELD: return op_reg(Register(REG_COPROC | u16(word & 0xF))) case .COPROC_OPC_MCRR: return op_imm(i64((word >> 4) & 0xF)) case .NEON_CMODE: return op_imm(i64((word >> 8) & 0xF)) case .NEON_OP_BIT: return op_imm(i64((word >> 5) & 1)) case .NEON_IMM8_ABCDEFGH: // Reconstruct abcdefgh from scattered wire bits, then apply cmode/op // expansion via decode_neon_modimm. a := extract_neon_modimm_abcdefgh(word) cmode := (word >> 8) & 0xF op := (word >> 5) & 1 return op_imm(i64(decode_neon_modimm(a, cmode, op))) case .VFP_IMM8: a := ((word >> 16) & 0xF) << 4 | (word & 0xF) return op_imm(i64(decode_vfp_imm8_f32(a))) case .VFP_S_LIST, .VFP_D_LIST: // Register count is encoded in bits 7..0 (count, not bitmask). Wrap // as a REG_LIST so the encoder's shape_match for SPR_LIST/DPR_LIST // accepts it; the encoder packs the same 8-bit count back out. return op_reg_list(u16(word & 0xFF)) // ---- Branch fields (decoded into RELATIVE) ---- case .BRANCH_24: v := i32(word & 0xFFFFFF) if v & 0x800000 != 0 { v |= -0x1000000 } return op_rel_offset(i64(v << 2)) case .BRANCH_24_T32: // T32 25-bit signed scattered: S | I1 | I2 | imm10 | imm11 s := (word >> 26) & 1 j1 := (word >> 13) & 1 j2 := (word >> 11) & 1 imm10 := (word >> 16) & 0x3FF imm11 := word & 0x7FF i1 := j1 ~ (s ~ 1) i2 := j2 ~ (s ~ 1) v := (s << 23) | (i1 << 22) | (i2 << 21) | (imm10 << 11) | imm11 if v & (1 << 23) != 0 { v |= ~u32(0xFFFFFF) } return op_rel_offset(i64(i32(v) << 1)) case .BRANCH_20_T32: // T32 21-bit signed for B s := (word >> 26) & 1 j1 := (word >> 13) & 1 j2 := (word >> 11) & 1 imm6 := (word >> 16) & 0x3F imm11 := word & 0x7FF v := (s << 19) | (j1 << 18) | (j2 << 17) | (imm6 << 11) | imm11 if v & (1 << 19) != 0 { v |= ~u32(0xFFFFF) } return op_rel_offset(i64(i32(v) << 1)) case .BRANCH_11_T16: v := word & 0x7FF if v & 0x400 != 0 { v |= ~u32(0x7FF) } return op_rel_offset(i64(i32(v) << 1)) case .BRANCH_8_T16: v := word & 0xFF if v & 0x80 != 0 { v |= ~u32(0xFF) } return op_rel_offset(i64(i32(v) << 1)) case .BRANCH_CBZ: i_bit := (word >> 9) & 1 imm5 := (word >> 3) & 0x1F v := (i_bit << 6) | (imm5 << 1) return op_rel_offset(i64(v)) // ---- ARMv8.1-M Branch Future ---- case .BF_BOFF: imm4 := (word >> 23) & 0xF // hw0[10:7] return op_rel_offset(i64(imm4) << 1) case .BF_BLOC: j := (word >> 11) & 1 // hw1[11] imm10 := (word >> 1) & 0x3FF // hw1[10:1] val := (imm10 << 1) | j return op_rel_offset(i64(val) << 1) case .BF_RM: return op_reg(Register(REG_GPR | u16((word >> 16) & 0xF))) case .BFCSEL_COND: return op_imm(i64((word >> 18) & 0xF)) // ---- Saturate / bit field ---- case .SAT_IMM5, .SAT_IMM5_T32, .BFI_MSB: return op_imm(i64((word >> 16) & 0x1F)) case .BFI_LSB, .BFI_LSB_T32: return op_imm(i64((word >> 7) & 0x1F)) case .NEON_SHIFT_IMM6: return op_imm(i64((word >> 16) & 0x3F)) case .NEON_SHIFT_IMM3: return op_imm(i64((word >> 16) & 0x7)) // ---- A32 RS shift (Rs register in bits 11:8) ---- case .A32_RS_SHIFT: return op_reg(Register(REG_GPR | u16((word >> 8) & 0xF))) // ---- A32 COND ---- case .A32_COND_FIELD: return op_imm(i64((word >> 28) & 0xF)) // ---- MVE Q-registers (3-bit indexed) ---- case .QD_MVE: n := (word >> 13) & 0x7 return op_reg(Register(REG_QPR | u16(n))) case .QN_MVE: n := ((word >> 17) & 0x7) | (((word >> 7) & 1) << 3) return op_reg(Register(REG_QPR | u16(n & 0x7))) case .QM_MVE: n := (word >> 1) & 0x7 return op_reg(Register(REG_QPR | u16(n))) case .MVE_SIZE_FIELD: return op_imm(i64((word >> 20) & 0x3)) case .MVE_VPT_MASK_FIELD: return op_imm(i64((word >> 13) & 0xF)) case .MVE_LOOP_IMM: // ARMv8.1-M loop-branch imm11 sign-extended v := (word >> 1) & 0x7FF if v & 0x400 != 0 { v |= ~u32(0x7FF) } return op_rel_offset(i64(i32(v) << 1)) case .CDE_COPROC_FIELD: return op_imm(i64((word >> 8) & 0x7)) case .CDE_IMM_FIELD: return op_imm(i64(word & 0x7F)) case .CDE_ACC_FIELD: return op_imm(i64((word >> 16) & 1)) case .V8M_TT_AT_BITS: return op_imm(i64((word >> 6) & 0x3)) // ---- Memory addressing flavours ---- // PRE_INDEX and POST_INDEX wrap MEM_IMM12_OFFSET: same field layout but the // addressing mode flag is set differently. We reconstruct the full Memory // operand here (base, disp, sign, mode). case .MEM_PRE_INDEX: base := Register(REG_GPR | u16((word >> 16) & 0xF)) u_bit := (word >> 23) & 1 disp := i32(word & 0xFFF) if u_bit == 0 { disp = -disp } return op_mem(mem_imm_pre(base, disp)) case .MEM_POST_INDEX: base := Register(REG_GPR | u16((word >> 16) & 0xF)) u_bit := (word >> 23) & 1 disp := i32(word & 0xFFF) if u_bit == 0 { disp = -disp } return op_mem(mem_imm_post(base, disp)) case .MEM_LITERAL: // PC-relative literal load: U bit + 12-bit signed disp u_bit := (word >> 23) & 1 disp := i32(word & 0xFFF) if u_bit == 0 { disp = -disp } return op_rel_offset(i64(disp)) case: return op_imm(0) } } // ----------------------------------------------------------------------------- // Buffer-Sizing Helpers (let callers pre-size so the decode hot path never // reallocates; allocates no new buffers -- only the caller's arrays grow). // ----------------------------------------------------------------------------- // Instruction-count ceiling for `data` (A32 is 4 bytes, Thumb 2; minimum 2). @(require_results) decode_max_instruction_count :: #force_inline proc "contextless" (data: []u8) -> int { return len(data) / 2 } // Typical-case estimate of the instruction count for `data`. @(require_results) decode_estimate_instruction_count :: #force_inline proc "contextless" (data: []u8) -> int { return len(data) / 4 + 8 } // Pre-size the caller's decode output arrays for `data` (reserves on top of any // existing elements; nil to skip; exact=true for the ceiling, else the estimate). decode_reserve :: proc(instructions: ^[dynamic]Instruction, inst_info: ^[dynamic]Instruction_Info, label_defs: ^[dynamic]Label_Definition, data: []u8, exact: bool = false) { n := exact ? decode_max_instruction_count(data) : decode_estimate_instruction_count(data) if instructions != nil { reserve(instructions, len(instructions) + n) } if inst_info != nil { reserve(inst_info, len(inst_info) + n) } if label_defs != nil { reserve(label_defs, len(label_defs) + n) } }