mirror of
https://github.com/odin-lang/Odin.git
synced 2026-06-19 16:42:33 +00:00
New VMOV_LANE_8/16/32 encodings: Dd at bits 19:16+bit7, lane bits per element size (.8 = bit21:bit6:bit5 with bit22 size marker; .16 = bit21:bit6 with bit5 marker; .32 = bit21). Verify round-trips all three sizes; spot-checked .8 byte-exact incl. max lane; 600 tests green.
770 lines
28 KiB
Odin
770 lines
28 KiB
Odin
// rexcode · Brendan Punsky (dotbmp@github), original author
|
|
|
|
package rexcode_arm32
|
|
|
|
// =============================================================================
|
|
// AArch32 ENCODER
|
|
// =============================================================================
|
|
//
|
|
// Two-pass design (mirrors riscv/encoder.odin):
|
|
//
|
|
// PASS 1 - For each Instruction, find the first matching Encoding form
|
|
// (by Mnemonic / mode / operand-shape), pack operand bits onto
|
|
// the form's static `bits`, and emit either 2 or 4 bytes
|
|
// depending on inst_size_from_bits. Branch operands emit
|
|
// Relocation entries that PASS 2 resolves.
|
|
// PASS 1.5 - Rewrite label_defs[] from instruction index to byte offset
|
|
// (required because T16 and T32 instructions mix 2/4-byte sizes).
|
|
// PASS 2 - Walk the pending relocations and patch in scattered branch
|
|
// offsets, dropping any whose label resolved.
|
|
//
|
|
// PC for arm32 is (current_inst_addr + 8) in A32 and (+4) in T32; the
|
|
// resolver subtracts that automatically.
|
|
|
|
MAX_INST_SIZE :: 4
|
|
|
|
encode_max_code_size :: #force_inline proc "contextless" (n: int) -> int { return n * 4 }
|
|
encode_max_relocation_count :: #force_inline proc "contextless" (n: int) -> int { return n }
|
|
|
|
encode :: proc(
|
|
instructions: []Instruction,
|
|
label_defs: []Label_Definition,
|
|
code: []u8,
|
|
relocs: ^[dynamic]Relocation,
|
|
errors: ^[dynamic]Error,
|
|
resolve: bool = true,
|
|
base_address: u64 = 0,
|
|
) -> Result {
|
|
n_inst := len(instructions)
|
|
if len(code) < n_inst * 4 {
|
|
append(errors, Error{inst_idx = 0, code = .BUFFER_OVERFLOW})
|
|
return Result{byte_count = 0, success = false}
|
|
}
|
|
|
|
errors_start := u32(len(errors))
|
|
pending_start := u32(len(relocs))
|
|
pc: u32 = 0
|
|
|
|
inst_pc := make([]u32, n_inst, context.temp_allocator)
|
|
|
|
// ---- PASS 1 ------------------------------------------------------------
|
|
for i in 0..<n_inst {
|
|
inst_pc[i] = pc
|
|
inst := &instructions[i]
|
|
word, ilen, ok := encode_one_inline(inst, pc, u16(i), relocs, errors)
|
|
if !ok { return Result{byte_count = pc, success = false} }
|
|
if ilen == 2 {
|
|
write_u16_le(code, pc, u16(word))
|
|
} else {
|
|
// T32 32-bit: bits = low_hword | (high_hword << 16); each
|
|
// halfword is written little-endian in its own slot.
|
|
if inst.mode == .T32 {
|
|
write_u16_le(code, pc, u16(word >> 16))
|
|
write_u16_le(code, pc + 2, u16(word))
|
|
} else {
|
|
write_u32_le(code, pc, word)
|
|
}
|
|
}
|
|
pc += u32(ilen)
|
|
}
|
|
|
|
// ---- PASS 1.5: label_def instruction-idx -> byte-offset -----------------
|
|
for &ld in label_defs {
|
|
if ld != LABEL_UNDEFINED {
|
|
idx := int(u32(ld))
|
|
if idx < n_inst {
|
|
ld = Label_Definition(inst_pc[idx])
|
|
} else {
|
|
ld = LABEL_UNDEFINED
|
|
}
|
|
}
|
|
}
|
|
|
|
if !resolve {
|
|
return Result{byte_count = pc, success = u32(len(errors)) == errors_start}
|
|
}
|
|
|
|
// ---- PASS 2: resolve relocations ----------------------------------------
|
|
n_relocs := u32(len(relocs))
|
|
write_idx := pending_start
|
|
for read_idx in pending_start..<n_relocs {
|
|
r := relocs[read_idx]
|
|
if resolve_relocation_inline(code, label_defs, &r, base_address, errors) {
|
|
continue
|
|
}
|
|
if write_idx != read_idx { relocs[write_idx] = r }
|
|
write_idx += 1
|
|
}
|
|
if write_idx != n_relocs { resize(relocs, int(write_idx)) }
|
|
|
|
return Result{byte_count = pc, success = u32(len(errors)) == errors_start}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Encode one instruction
|
|
// =============================================================================
|
|
|
|
@(private="file")
|
|
encode_one_inline :: #force_inline proc(
|
|
inst: ^Instruction,
|
|
pc: u32,
|
|
inst_idx: u16,
|
|
relocs: ^[dynamic]Relocation,
|
|
errors: ^[dynamic]Error,
|
|
) -> (word: u32, ilen: u8, ok: bool) {
|
|
if inst.mnemonic == .INVALID {
|
|
append(errors, Error{inst_idx = u32(inst_idx), code = .INVALID_MNEMONIC})
|
|
return 0, 0, false
|
|
}
|
|
|
|
forms := encoding_forms(inst.mnemonic)
|
|
if len(forms) == 0 {
|
|
append(errors, Error{inst_idx = u32(inst_idx), code = .INVALID_MNEMONIC})
|
|
return 0, 0, false
|
|
}
|
|
|
|
// Find a form matching the active mode + operand shape + S-flag.
|
|
// If the caller supplied an inst.length, also constrain the candidate
|
|
// form's ilen — T32 mode hosts both T16 (ilen=2) and T32-wide (ilen=4)
|
|
// forms with overlapping shape matches; without this filter the wide
|
|
// form silently degrades to the narrow form on encode.
|
|
want_len: u8 = inst.length
|
|
form: ^Encoding
|
|
|
|
// form-id hint: when the decoder roundtrips an instruction, it stamps the
|
|
// ENCODING_TABLE-relative form index it picked (+1, so 0 means "no hint").
|
|
// Try that exact form first; if it still passes the shape/mode checks,
|
|
// use it. Resolves the NEON size-variant ambiguity (DPR,DPR,DPR shape is
|
|
// shared by VADD.I8/.I16/.I32/.F16/.F32 forms with different fixed bits).
|
|
if inst.form_id != 0 && int(inst.form_id) - 1 < len(forms) {
|
|
f := &forms[inst.form_id - 1]
|
|
if f.mode == inst.mode &&
|
|
(want_len == 0 || inst_size_from_bits(f.bits, f.mode) == want_len) &&
|
|
encoding_matches_inline(inst, f) &&
|
|
inst.flags.sets_flags == f.flags.sets_flags &&
|
|
mem_mode_matches(inst, f) {
|
|
form = f
|
|
}
|
|
}
|
|
if form == nil {
|
|
for &f in forms {
|
|
if f.mode != inst.mode { continue }
|
|
if want_len > 0 && inst_size_from_bits(f.bits, f.mode) != want_len { continue }
|
|
if !encoding_matches_inline(inst, &f) { continue }
|
|
if inst.flags.sets_flags && !f.flags.sets_flags { continue }
|
|
if !inst.flags.sets_flags && f.flags.sets_flags { continue }
|
|
if !mem_mode_matches(inst, &f) { continue }
|
|
form = &f
|
|
break
|
|
}
|
|
}
|
|
if form == nil {
|
|
append(errors, Error{inst_idx = u32(inst_idx), code = .NO_MATCHING_ENCODING})
|
|
return 0, 0, false
|
|
}
|
|
|
|
word = form.bits
|
|
|
|
// Bake condition into bits 31:28 for A32 conditional entries.
|
|
// Detect: mask bits 31:28 = 0 means cond field is variable (conditional).
|
|
// (cond_in_28 flag in encoding_types.odin defaults to false, so we use
|
|
// the structural mask test as the source of truth here.)
|
|
if form.mode == .A32 && (form.mask >> 28) == 0 {
|
|
word = (word & 0x0FFFFFFF) | (u32(inst.cond) << 28)
|
|
}
|
|
|
|
if form.enc[0] != .NONE { word |= pack_operand_inline(&inst.ops[0], form.enc[0], pc, inst_idx, relocs, form) }
|
|
if form.enc[1] != .NONE { word |= pack_operand_inline(&inst.ops[1], form.enc[1], pc, inst_idx, relocs, form) }
|
|
if form.enc[2] != .NONE { word |= pack_operand_inline(&inst.ops[2], form.enc[2], pc, inst_idx, relocs, form) }
|
|
if form.enc[3] != .NONE { word |= pack_operand_inline(&inst.ops[3], form.enc[3], pc, inst_idx, relocs, form) }
|
|
|
|
return word, inst_size_from_bits(form.bits, form.mode), true
|
|
}
|
|
|
|
// =============================================================================
|
|
// Shape matching: do the Operand kinds line up with the form's Operand_Type?
|
|
// =============================================================================
|
|
|
|
@(private="file")
|
|
is_rsr_shift_type :: #force_inline proc "contextless" (s: Shift_Type) -> bool {
|
|
return s == .LSL_REG || s == .LSR_REG || s == .ASR_REG || s == .ROR_REG
|
|
}
|
|
|
|
@(private="file")
|
|
rsr_type_bits :: #force_inline proc "contextless" (s: Shift_Type) -> u32 {
|
|
#partial switch s {
|
|
case .LSL_REG: return 0
|
|
case .LSR_REG: return 1
|
|
case .ASR_REG: return 2
|
|
case .ROR_REG: return 3
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// Memory addressing modes (OFFSET vs PRE_INDEX vs POST_INDEX) aren't carried
|
|
// in the Operand_Type shape — both .MEM forms shape-match equally. Pick the
|
|
// form whose memory encoding matches the operand's mode so a [Rn,#x]! input
|
|
// gets the writeback form, not the plain offset form (and vice versa).
|
|
@(private="file")
|
|
mem_mode_matches :: #force_inline proc "contextless" (inst: ^Instruction, form: ^Encoding) -> bool {
|
|
for k in 0..<4 {
|
|
op := &inst.ops[k]
|
|
if op.kind != .MEMORY { continue }
|
|
m := op.mem.mode
|
|
// No explicit "none" register sentinel — `mem_imm` leaves index at
|
|
// the zero value (Register(0) == R0), which we treat as "no index".
|
|
// Callers wanting [Rn, R0] must use `mem_reg(Rn, R1)` and pick a
|
|
// different register; this is a pragmatic ambiguity, not a true bug,
|
|
// because R0-as-index is exceedingly rare in real code.
|
|
has_index := op.mem.index != Register(0)
|
|
#partial switch form.enc[k] {
|
|
case .MEM_IMM12_OFFSET, .MEM_IMM8_OFFSET:
|
|
if m != .OFFSET { return false }
|
|
if has_index { return false }
|
|
case .MEM_REG_OFFSET, .MEM_DOUBLEREG:
|
|
if m != .OFFSET { return false }
|
|
if !has_index { return false }
|
|
case .MEM_PRE_INDEX:
|
|
if m != .PRE_INDEX { return false }
|
|
case .MEM_POST_INDEX:
|
|
if m != .POST_INDEX { return false }
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
@(private="file")
|
|
encoding_matches_inline :: #force_inline proc "contextless" (inst: ^Instruction, form: ^Encoding) -> bool {
|
|
return operand_matches_inline(&inst.ops[0], form.ops[0]) &&
|
|
operand_matches_inline(&inst.ops[1], form.ops[1]) &&
|
|
operand_matches_inline(&inst.ops[2], form.ops[2]) &&
|
|
operand_matches_inline(&inst.ops[3], form.ops[3])
|
|
}
|
|
|
|
@(private="file")
|
|
operand_matches_inline :: #force_inline proc "contextless" (op: ^Operand, ot: Operand_Type) -> bool {
|
|
#partial switch ot {
|
|
case .NONE:
|
|
return op.kind == .NONE
|
|
case .GPR, .GPR_NOPC, .GPR_NOSP, .GPR_LOW:
|
|
return op.kind == .REGISTER && is_gpr(op.reg)
|
|
case .GPR_SHIFTED:
|
|
return op.kind == .REGISTER && is_gpr(op.reg) &&
|
|
op.shift_type != .NONE &&
|
|
!is_rsr_shift_type(op.shift_type)
|
|
case .GPR_RSR: return op.kind == .REGISTER && is_gpr(op.reg) && is_rsr_shift_type(op.shift_type)
|
|
case .GPR_LIST: return op.kind == .REG_LIST
|
|
case .SPR: return op.kind == .REGISTER && is_spr(op.reg)
|
|
case .DPR: return op.kind == .REGISTER && is_dpr(op.reg)
|
|
case .QPR: return op.kind == .REGISTER && is_qpr(op.reg)
|
|
case .DPR_ELEM: return op.kind == .REGISTER && is_dpr(op.reg)
|
|
case .QPR_ELEM: return op.kind == .REGISTER && is_qpr(op.reg)
|
|
case .SPR_ELEM: return op.kind == .REGISTER && is_spr(op.reg)
|
|
case .SPR_LIST, .DPR_LIST:
|
|
return op.kind == .REG_LIST || (op.kind == .REGISTER && (is_spr(op.reg) || is_dpr(op.reg)))
|
|
case .IMM, .IMM_MOD, .IMM_T32_MOD, .IMM12, .IMM5, .IMM5_W,
|
|
.IMM4, .IMM4_SAT, .IMM8, .IMM3, .IMM_HINT, .IMM_BARRIER,
|
|
.IMM_ENDIAN, .IMM_IFLAGS, .IMM_BANKED, .IMM_SYSM,
|
|
.IMM_COPROC, .IMM_COPROC_OP, .NEON_IMM, .IMM16_LO_HI:
|
|
return op.kind == .IMMEDIATE
|
|
case .REL24, .REL24_T32, .REL20, .REL11, .REL8, .REL_LDR_LITERAL:
|
|
return op.kind == .RELATIVE
|
|
case .COND:
|
|
return op.kind == .IMMEDIATE
|
|
case .MEM:
|
|
// Most MEM forms expect a Memory operand, but PC-relative literal
|
|
// loads (form encoding .MEM_LITERAL) decode to a RELATIVE operand so
|
|
// the branch-resolution pass can patch the label offset. Accept both.
|
|
return op.kind == .MEMORY || op.kind == .RELATIVE
|
|
case .COPROC_REG, .COPROC_NUM:
|
|
return op.kind == .REGISTER || op.kind == .IMMEDIATE
|
|
case .PSR_FIELD:
|
|
return op.kind == .IMMEDIATE
|
|
case .VPR, .QPR_MVE:
|
|
return op.kind == .REGISTER && is_qpr(op.reg)
|
|
case .QPR_MVE_LIST:
|
|
return op.kind == .REG_LIST || (op.kind == .REGISTER && is_qpr(op.reg))
|
|
case .MVE_VPT_MASK, .MVE_VCTP_SIZE, .MVE_LOOP_TGT, .CDE_COPROC,
|
|
.CDE_IMM, .CDE_VFP_REG:
|
|
return op.kind == .IMMEDIATE || op.kind == .REGISTER || op.kind == .RELATIVE
|
|
}
|
|
return false
|
|
}
|
|
|
|
// =============================================================================
|
|
// Operand packer
|
|
// =============================================================================
|
|
|
|
@(private="file")
|
|
pack_operand_inline :: #force_inline proc(
|
|
op: ^Operand,
|
|
enc: Operand_Encoding,
|
|
pc: u32,
|
|
inst_idx: u16,
|
|
relocs: ^[dynamic]Relocation,
|
|
form: ^Encoding,
|
|
) -> u32 {
|
|
switch enc {
|
|
case .NONE, .IMPL:
|
|
return 0
|
|
|
|
// ---- A32 GPR slots ----
|
|
case .RD: return (u32(reg_hw(op.reg)) & 0xF) << 12
|
|
case .RN_A32: return (u32(reg_hw(op.reg)) & 0xF) << 16
|
|
case .RM_A32:
|
|
reg := u32(reg_hw(op.reg)) & 0xF
|
|
st := op.shift_type
|
|
// Register-shifted register: type in 6..5, Rs in 11..8, bit 4 = 1.
|
|
if is_rsr_shift_type(st) {
|
|
rs := u32(op.shift_amt) & 0xF
|
|
return reg | (rs << 8) | (rsr_type_bits(st) << 5) | (u32(1) << 4)
|
|
}
|
|
// Imm-shift / RRX / naked register.
|
|
if st == .RRX { return reg | (u32(Shift_Type.ROR) & 0x3) << 5 }
|
|
if st == .NONE { return reg }
|
|
if op.shift_amt == 0 && st == .LSL { return reg } // LSL #0 == naked
|
|
amt := u32(op.shift_amt) & 0x1F
|
|
return reg | (amt << 7) | (u32(st) & 0x3) << 5
|
|
case .RS_A32: return (u32(reg_hw(op.reg)) & 0xF) << 8
|
|
case .RT_A32: return (u32(reg_hw(op.reg)) & 0xF) << 12
|
|
case .RT2_A32: return (u32(reg_hw(op.reg)) & 0xF) << 16
|
|
case .RA_A32: return (u32(reg_hw(op.reg)) & 0xF) << 12
|
|
case .RDLO_A32: return (u32(reg_hw(op.reg)) & 0xF) << 12
|
|
case .RDHI_A32: return (u32(reg_hw(op.reg)) & 0xF) << 16
|
|
|
|
// ---- T32 GPR slots (bits 11:8 of high halfword for Rd, etc.) ----
|
|
case .RD_T32: return (u32(reg_hw(op.reg)) & 0xF) << 8
|
|
case .RN_T32: return (u32(reg_hw(op.reg)) & 0xF) << 16
|
|
case .RM_T32: return u32(reg_hw(op.reg)) & 0xF
|
|
case .RT_T32: return (u32(reg_hw(op.reg)) & 0xF) << 12
|
|
case .RT2_T32: return (u32(reg_hw(op.reg)) & 0xF) << 8
|
|
case .RA_T32: return (u32(reg_hw(op.reg)) & 0xF) << 12
|
|
|
|
// ---- T16 GPR slots ----
|
|
case .RD_T16_LO: return u32(reg_hw(op.reg)) & 0x7
|
|
case .RM_T16_LO: return (u32(reg_hw(op.reg)) & 0x7) << 3
|
|
case .RN_T16_LO: return (u32(reg_hw(op.reg)) & 0x7) << 3
|
|
case .RD_T16_HI:
|
|
// hi-reg form: rd[3] at bit 7, rd[2:0] at bits 2:0
|
|
v := u32(reg_hw(op.reg)) & 0xF
|
|
return (v & 0x7) | ((v >> 3) & 1) << 7
|
|
case .RM_T16_HI:
|
|
// hi-reg form: rm at bits 6:3 (4 bits)
|
|
return (u32(reg_hw(op.reg)) & 0xF) << 3
|
|
|
|
// ---- Modified-immediate (A32 + T32) ----
|
|
case .A32_IMM_MOD, .A32_IMM12_ROT:
|
|
// Run the ARM modified-immediate algorithm: find a (rotate, value)
|
|
// pair that represents the full 32-bit constant.
|
|
v, ok := encode_a32_modimm(u32(op.immediate))
|
|
if !ok {
|
|
// Fall back to raw 12-bit if user pre-encoded
|
|
return u32(op.immediate) & 0xFFF
|
|
}
|
|
return v
|
|
case .T32_IMM_MOD:
|
|
// Find i:imm3:imm8 (12 bits) that expand to the user's 32-bit constant.
|
|
f12, ok := encode_t32_modimm(u32(op.immediate))
|
|
if !ok {
|
|
f12 = u32(op.immediate) & 0xFFF
|
|
}
|
|
i_bit := (f12 >> 11) & 1
|
|
imm3 := (f12 >> 8) & 0x7
|
|
imm8 := f12 & 0xFF
|
|
return (i_bit << 26) | (imm3 << 12) | imm8
|
|
|
|
// ---- A32 immediate field placements ----
|
|
case .A32_IMM12: return u32(op.immediate) & 0xFFF
|
|
case .A32_IMM_SHIFT: return (u32(op.immediate) & 0x1F) << 7
|
|
case .A32_SHIFT_TYPE: return (u32(op.immediate) & 0x3) << 5
|
|
case .A32_RS_SHIFT: return (u32(reg_hw(op.reg)) & 0xF) << 8
|
|
case .A32_IMM24:
|
|
// Branches: emit relocation
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .BRANCH_A32_24, size = 4, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
case .A32_IMM4: return u32(op.immediate) & 0xF
|
|
case .A32_IMM4_ROTATE: return (u32(op.immediate) & 0xF) << 8
|
|
case .A32_IMM5_LSB: return (u32(op.immediate) & 0x1F) << 7
|
|
case .A32_IMM5_W: return (u32(op.immediate) & 0x1F) << 16
|
|
case .A32_COND_FIELD: return (u32(op.immediate) & 0xF) << 28
|
|
case .A32_REG_LIST: return u32(op.immediate) & 0xFFFF
|
|
|
|
// ---- VFP / NEON register-field split encoders --------------------------
|
|
case .VD_S:
|
|
// S<n>: Vd[4:1] at bits 15:12, D bit (bit 0) at bit 22
|
|
n := u32(reg_hw(op.reg)) & 0x1F
|
|
return ((n >> 1) & 0xF) << 12 | (n & 1) << 22
|
|
case .VN_S:
|
|
n := u32(reg_hw(op.reg)) & 0x1F
|
|
return ((n >> 1) & 0xF) << 16 | (n & 1) << 7
|
|
case .VM_S:
|
|
n := u32(reg_hw(op.reg)) & 0x1F
|
|
return ((n >> 1) & 0xF) | (n & 1) << 5
|
|
case .VD_D, .VD_Q:
|
|
// D<n>/Q<n>: Vd[3:0] at bits 15:12, D bit (bit 4) at bit 22
|
|
// For Q-form, Q register index maps to D2*idx, so we use the QPR hw
|
|
// number directly (caller passes Q0..Q15 = hw 0..15).
|
|
n := u32(reg_hw(op.reg)) & 0x1F
|
|
if reg_class(op.reg) == REG_QPR { n = (n & 0xF) * 2 } // Q<n> -> D<2n>
|
|
return (n & 0xF) << 12 | ((n >> 4) & 1) << 22
|
|
case .VN_D, .VN_Q:
|
|
n := u32(reg_hw(op.reg)) & 0x1F
|
|
if reg_class(op.reg) == REG_QPR { n = (n & 0xF) * 2 }
|
|
return (n & 0xF) << 16 | ((n >> 4) & 1) << 7
|
|
case .VM_D, .VM_Q:
|
|
n := u32(reg_hw(op.reg)) & 0x1F
|
|
if reg_class(op.reg) == REG_QPR { n = (n & 0xF) * 2 }
|
|
return (n & 0xF) | ((n >> 4) & 1) << 5
|
|
case .NEON_VM_SCALAR16:
|
|
// Dm in D0..D7 at bits 2:0; lane = bit5(lane[1]) : bit3(lane[0]).
|
|
return (u32(reg_hw(op.reg)) & 0x7) | ((u32(op.lane) >> 1) & 1) << 5 | (u32(op.lane) & 1) << 3
|
|
case .NEON_VM_SCALAR32:
|
|
// Dm in D0..D15 at bits 3:0; lane = bit5.
|
|
return (u32(reg_hw(op.reg)) & 0xF) | (u32(op.lane) & 1) << 5
|
|
case .VMOV_LANE_8, .VMOV_LANE_16, .VMOV_LANE_32:
|
|
n := u32(reg_hw(op.reg)) & 0x1F // Dd
|
|
v := (n & 0xF) << 16 | ((n >> 4) & 1) << 7
|
|
l := u32(op.lane)
|
|
if enc == .VMOV_LANE_8 {
|
|
v |= ((l >> 2) & 1) << 21 | ((l >> 1) & 1) << 6 | (l & 1) << 5
|
|
} else if enc == .VMOV_LANE_16 {
|
|
v |= ((l >> 1) & 1) << 21 | (l & 1) << 6
|
|
} else {
|
|
v |= (l & 1) << 21
|
|
}
|
|
return v
|
|
case .VFP_IMM8:
|
|
// Run the VFP 8-bit float encoder; the user supplies the wire-format
|
|
// 32-bit float bit pattern (for F32). The encoder finds the abcdefgh.
|
|
if a, ok := encode_vfp_imm8_f32(u32(op.immediate)); ok {
|
|
return (u32(a) >> 4) << 16 | u32(a) & 0xF
|
|
}
|
|
return u32(op.immediate) & 0xFF
|
|
case .NEON_IMM8_ABCDEFGH:
|
|
// Caller passes a packed NEON_Imm_Form (cmode + op + abcdefgh) where
|
|
// the 32-bit constant has already been resolved. We extract the
|
|
// abcdefgh and lay it out per the wire (bits 24, 18:16, 3:0).
|
|
f, ok := encode_neon_modimm(u32(op.immediate))
|
|
if !ok {
|
|
// Fall back: treat low 8 bits as raw abcdefgh
|
|
v := u32(op.immediate) & 0xFF
|
|
return ((v >> 7) & 1) << 24 |
|
|
((v >> 4) & 0x7) << 16 |
|
|
(v & 0xF)
|
|
}
|
|
return pack_neon_modimm_field(f)
|
|
case .NEON_CMODE: return (u32(op.immediate) & 0xF) << 8
|
|
case .NEON_OP_BIT: return (u32(op.immediate) & 1) << 5
|
|
|
|
// ---- VFP/NEON register lists (LDM/STM/PUSH/POP for FP regs) ------------
|
|
case .VFP_S_LIST, .VFP_D_LIST:
|
|
return u32(op.immediate) & 0xFF
|
|
|
|
// ---- Memory addressing composites --------------------------------------
|
|
case .MEM_IMM12_OFFSET:
|
|
m := op.mem
|
|
base := (u32(reg_hw(m.base)) & 0xF) << 16
|
|
u_bit: u32 = m.disp >= 0 ? 1 : 0
|
|
disp := u32(abs_i32(m.disp)) & 0xFFF
|
|
return base | (u_bit << 23) | disp
|
|
case .MEM_IMM8_OFFSET:
|
|
m := op.mem
|
|
base := (u32(reg_hw(m.base)) & 0xF) << 16
|
|
u_bit: u32 = m.disp >= 0 ? 1 : 0
|
|
disp := u32(abs_i32(m.disp)) & 0xFF
|
|
return base | (u_bit << 23) | ((disp >> 4) & 0xF) << 8 | (disp & 0xF)
|
|
case .MEM_REG_OFFSET:
|
|
m := op.mem
|
|
base := (u32(reg_hw(m.base)) & 0xF) << 16
|
|
rm := u32(reg_hw(m.index)) & 0xF
|
|
u_bit: u32 = m.sign >= 0 ? 1 : 0
|
|
return base | (u_bit << 23) | rm
|
|
case .MEM_PRE_INDEX:
|
|
// Same layout as MEM_IMM12_OFFSET (base, U, disp); the form bits set
|
|
// P=1, W=1 in bits 24/21 to select pre-index addressing mode.
|
|
m := op.mem
|
|
base := (u32(reg_hw(m.base)) & 0xF) << 16
|
|
u_bit: u32 = m.disp >= 0 ? 1 : 0
|
|
disp := u32(abs_i32(m.disp)) & 0xFFF
|
|
return base | (u_bit << 23) | disp
|
|
case .MEM_POST_INDEX:
|
|
// Same layout as MEM_IMM12_OFFSET; form bits select P=0 in bit 24.
|
|
m := op.mem
|
|
base := (u32(reg_hw(m.base)) & 0xF) << 16
|
|
u_bit: u32 = m.disp >= 0 ? 1 : 0
|
|
disp := u32(abs_i32(m.disp)) & 0xFFF
|
|
return base | (u_bit << 23) | disp
|
|
case .MEM_LITERAL:
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .LDR_LITERAL_A32, size = 4, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
case .MEM_DOUBLEREG:
|
|
m := op.mem
|
|
return ((u32(reg_hw(m.base)) & 0xF) << 16) | (u32(reg_hw(m.index)) & 0xF)
|
|
|
|
// ---- Coprocessor -------------------------------------------------------
|
|
case .COPROC_NUM_FIELD: return (u32(op.immediate) & 0xF) << 8
|
|
case .COPROC_OPC1_FIELD: return (u32(op.immediate) & 0xF) << 20
|
|
case .COPROC_OPC2_FIELD: return (u32(op.immediate) & 0x7) << 5
|
|
case .COPROC_CRN_FIELD: return (u32(reg_hw(op.reg)) & 0xF) << 16
|
|
case .COPROC_CRM_FIELD: return u32(reg_hw(op.reg)) & 0xF
|
|
case .COPROC_OPC_MCRR: return (u32(op.immediate) & 0xF) << 4
|
|
|
|
// ---- Branch fields -----------------------------------------------------
|
|
case .BRANCH_24:
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .BRANCH_A32_24, size = 4, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
case .BRANCH_24_T32:
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .BRANCH_T32_25, size = 4, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
case .BRANCH_20_T32:
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .BRANCH_T32_21, size = 4, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
case .BRANCH_11_T16:
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .BRANCH_T16_11, size = 2, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
case .BRANCH_8_T16:
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .BRANCH_T16_8, size = 2, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
case .BRANCH_CBZ:
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .BRANCH_T16_CBZ, size = 2, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
|
|
// ---- Misc --------------------------------------------------------------
|
|
case .PSR_FIELD_MASK: return encode_psr_field(u8(op.immediate))
|
|
case .SYSM_FIELD: return u32(op.immediate) & 0xFF
|
|
case .BARRIER_TYPE: return u32(op.immediate) & 0xF
|
|
case .IT_MASK: return u32(op.immediate) & 0xFF
|
|
case .CPS_IFLAGS: return u32(op.immediate) & 0x1FF
|
|
case .HINT_FIELD: return u32(op.immediate) & 0xFF
|
|
case .SAT_IMM5, .SAT_IMM5_T32:
|
|
return (u32(op.immediate) & 0x1F) << 16
|
|
case .BFI_MSB: return (u32(op.immediate) & 0x1F) << 16
|
|
case .BFI_LSB, .BFI_LSB_T32:
|
|
return (u32(op.immediate) & 0x1F) << 7
|
|
case .NEON_SHIFT_IMM6: return (u32(op.immediate) & 0x3F) << 16
|
|
case .NEON_SHIFT_IMM3: return (u32(op.immediate) & 0x7) << 16
|
|
|
|
// ---- MVE / CDE specifics (placeholders; bits per operand encoding) -----
|
|
case .QD_MVE: return (u32(reg_hw(op.reg)) & 0x7) << 13
|
|
case .QN_MVE: return ((u32(reg_hw(op.reg)) & 0x7) << 17) | ((u32(reg_hw(op.reg)) & 0x8) << 4)
|
|
case .QM_MVE: return (u32(reg_hw(op.reg)) & 0x7) << 1
|
|
case .MVE_SIZE_FIELD: return (u32(op.immediate) & 0x3) << 20
|
|
case .MVE_VPT_MASK_FIELD: return (u32(op.immediate) & 0xF) << 13
|
|
case .MVE_LOOP_IMM:
|
|
append(relocs, Relocation{
|
|
offset = pc, label_id = u32(op.relative),
|
|
type = .BRANCH_T32_WLS, size = 4, inst_idx = inst_idx,
|
|
})
|
|
return 0
|
|
case .CDE_COPROC_FIELD: return (u32(op.immediate) & 0x7) << 8
|
|
case .CDE_IMM_FIELD: return u32(op.immediate) & 0x7F
|
|
case .CDE_ACC_FIELD: return (u32(op.immediate) & 1) << 16
|
|
case .V8M_TT_AT_BITS: return (u32(op.immediate) & 0x3) << 6
|
|
}
|
|
|
|
return 0
|
|
}
|
|
|
|
@(private="file")
|
|
abs_i32 :: #force_inline proc "contextless" (v: i32) -> i32 {
|
|
return v < 0 ? -v : v
|
|
}
|
|
|
|
// =============================================================================
|
|
// Pass 2 -- relocation resolver
|
|
// =============================================================================
|
|
|
|
@(private="file")
|
|
resolve_relocation_inline :: #force_inline proc(
|
|
code: []u8,
|
|
label_defs: []Label_Definition,
|
|
r: ^Relocation,
|
|
base_address: u64,
|
|
errors: ^[dynamic]Error,
|
|
) -> bool {
|
|
if int(r.label_id) >= len(label_defs) { return false }
|
|
ld := label_defs[r.label_id]
|
|
if ld == LABEL_UNDEFINED { return false }
|
|
target := u32(ld)
|
|
|
|
#partial switch r.type {
|
|
case .BRANCH_A32_24:
|
|
// PC = inst_addr + 8 in A32 mode
|
|
rel := i32(target) - (i32(r.offset) + 8) + r.addend
|
|
if rel & 3 != 0 || rel < -(1 << 25) || rel >= (1 << 25) {
|
|
append(errors, Error{inst_idx = u32(r.inst_idx), code = .LABEL_OUT_OF_RANGE})
|
|
return true
|
|
}
|
|
imm24 := u32(rel >> 2) & 0xFFFFFF
|
|
word := read_u32_le(code, r.offset)
|
|
word = (word & 0xFF000000) | imm24
|
|
write_u32_le(code, r.offset, word)
|
|
return true
|
|
|
|
case .BRANCH_T32_25:
|
|
// PC = inst_addr + 4 in T32
|
|
rel := i32(target) - (i32(r.offset) + 4) + r.addend
|
|
if rel & 1 != 0 || rel < -(1 << 24) || rel >= (1 << 24) {
|
|
append(errors, Error{inst_idx = u32(r.inst_idx), code = .LABEL_OUT_OF_RANGE})
|
|
return true
|
|
}
|
|
// 25-bit signed: S | I1 | I2 | imm10 | imm11 (scattered)
|
|
v := u32(rel >> 1)
|
|
s := (v >> 23) & 1
|
|
i1 := ((v >> 22) & 1) ~ (s ~ 1)
|
|
i2 := ((v >> 21) & 1) ~ (s ~ 1)
|
|
imm10 := (v >> 11) & 0x3FF
|
|
imm11 := v & 0x7FF
|
|
// word layout (low halfword first in memory, but we work on packed u32)
|
|
hi := u16(0xF000) | u16(s << 10) | u16(imm10)
|
|
lo := u16(0x9000) | u16(i1 << 13) | u16(i2 << 11) | u16(imm11)
|
|
write_u16_le(code, r.offset, hi)
|
|
write_u16_le(code, r.offset + 2, lo)
|
|
return true
|
|
|
|
case .BRANCH_T32_21:
|
|
// T32 B<cond>: PC = inst + 4
|
|
rel := i32(target) - (i32(r.offset) + 4) + r.addend
|
|
if rel & 1 != 0 || rel < -(1 << 20) || rel >= (1 << 20) {
|
|
append(errors, Error{inst_idx = u32(r.inst_idx), code = .LABEL_OUT_OF_RANGE})
|
|
return true
|
|
}
|
|
v := u32(rel >> 1)
|
|
s := (v >> 19) & 1
|
|
j1 := (v >> 18) & 1
|
|
j2 := (v >> 17) & 1
|
|
imm6 := (v >> 11) & 0x3F
|
|
imm11 := v & 0x7FF
|
|
hi := u16(0xF000) | u16(s << 10) | u16(imm6)
|
|
lo := u16(0x8000) | u16(j1 << 13) | u16(j2 << 11) | u16(imm11)
|
|
// Note: cond bits come from form.bits, which we OR with hi
|
|
existing_hi := read_u16_le(code, r.offset)
|
|
existing_lo := read_u16_le(code, r.offset + 2)
|
|
write_u16_le(code, r.offset, existing_hi | hi)
|
|
write_u16_le(code, r.offset + 2, existing_lo | lo)
|
|
return true
|
|
|
|
case .BRANCH_T16_11:
|
|
rel := i32(target) - (i32(r.offset) + 4) + r.addend
|
|
if rel & 1 != 0 || rel < -(1 << 11) || rel >= (1 << 11) {
|
|
append(errors, Error{inst_idx = u32(r.inst_idx), code = .LABEL_OUT_OF_RANGE})
|
|
return true
|
|
}
|
|
imm11 := u16(u32(rel >> 1) & 0x7FF)
|
|
word := read_u16_le(code, r.offset)
|
|
word = (word & 0xF800) | imm11
|
|
write_u16_le(code, r.offset, word)
|
|
return true
|
|
|
|
case .BRANCH_T16_8:
|
|
rel := i32(target) - (i32(r.offset) + 4) + r.addend
|
|
if rel & 1 != 0 || rel < -256 || rel >= 256 {
|
|
append(errors, Error{inst_idx = u32(r.inst_idx), code = .LABEL_OUT_OF_RANGE})
|
|
return true
|
|
}
|
|
imm8 := u16(u32(rel >> 1) & 0xFF)
|
|
word := read_u16_le(code, r.offset)
|
|
word = (word & 0xFF00) | imm8
|
|
write_u16_le(code, r.offset, word)
|
|
return true
|
|
|
|
case .BRANCH_T16_CBZ:
|
|
rel := i32(target) - (i32(r.offset) + 4) + r.addend
|
|
if rel < 0 || rel & 1 != 0 || rel >= (1 << 7) {
|
|
append(errors, Error{inst_idx = u32(r.inst_idx), code = .LABEL_OUT_OF_RANGE})
|
|
return true
|
|
}
|
|
v := u32(rel >> 1)
|
|
i_bit := (v >> 5) & 1
|
|
imm5 := v & 0x1F
|
|
word := read_u16_le(code, r.offset)
|
|
word = (word & 0xFD07) | u16(i_bit << 9) | u16(imm5 << 3)
|
|
write_u16_le(code, r.offset, word)
|
|
return true
|
|
|
|
case .BRANCH_T32_WLS, .BRANCH_T32_LE:
|
|
// ARMv8.1-M low-overhead loop branches; signed 11-bit << 1
|
|
rel := i32(target) - (i32(r.offset) + 4) + r.addend
|
|
if rel & 1 != 0 || rel < -(1 << 11) || rel >= (1 << 11) {
|
|
append(errors, Error{inst_idx = u32(r.inst_idx), code = .LABEL_OUT_OF_RANGE})
|
|
return true
|
|
}
|
|
// imm11 packed at bits 10:1 (low halfword)
|
|
imm11 := u16(u32(rel >> 1) & 0x7FF)
|
|
existing := read_u16_le(code, r.offset + 2)
|
|
write_u16_le(code, r.offset + 2, existing | (imm11 << 1))
|
|
return true
|
|
|
|
case .LDR_LITERAL_A32:
|
|
rel := i32(target) - (i32(r.offset) + 8) + r.addend
|
|
u_bit: u32 = rel >= 0 ? 1 : 0
|
|
abs := u32(rel < 0 ? -rel : rel)
|
|
if abs >= 4096 {
|
|
append(errors, Error{inst_idx = u32(r.inst_idx), code = .LABEL_OUT_OF_RANGE})
|
|
return true
|
|
}
|
|
word := read_u32_le(code, r.offset)
|
|
word = (word & 0xFF7FF000) | (u_bit << 23) | abs
|
|
write_u32_le(code, r.offset, word)
|
|
return true
|
|
|
|
case:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Halfword/word I/O
|
|
// =============================================================================
|
|
|
|
@(private="package")
|
|
write_u32_le :: #force_inline proc "contextless" (code: []u8, offset, word: u32) {
|
|
code[offset+0] = u8(word)
|
|
code[offset+1] = u8(word >> 8)
|
|
code[offset+2] = u8(word >> 16)
|
|
code[offset+3] = u8(word >> 24)
|
|
}
|
|
|
|
@(private="package")
|
|
read_u32_le :: #force_inline proc "contextless" (code: []u8, offset: u32) -> u32 {
|
|
return u32(code[offset+0]) |
|
|
(u32(code[offset+1]) << 8) |
|
|
(u32(code[offset+2]) << 16) |
|
|
(u32(code[offset+3]) << 24)
|
|
}
|
|
|
|
@(private="package")
|
|
write_u16_le :: #force_inline proc "contextless" (code: []u8, offset: u32, word: u16) {
|
|
code[offset+0] = u8(word)
|
|
code[offset+1] = u8(word >> 8)
|
|
}
|
|
|
|
@(private="package")
|
|
read_u16_le :: #force_inline proc "contextless" (code: []u8, offset: u32) -> u16 {
|
|
return u16(code[offset+0]) | (u16(code[offset+1]) << 8)
|
|
}
|