Files
Odin/core/rexcode/isa/mips/decoder.odin
Brendan Punsky fae15847a3 rexcode: buffer-sizing helpers across all ISAs + naming-contract doc
Roll the encode/decode buffer-sizing helpers (added for x86 in 49787b7de) out
to every other ISA, and document them in the cross-arch naming contract.

Per arch (arm32, arm64, mips, riscv, ppc, ppc_vle, rsp, mos6502, mos65816):
  - encode_max_code_size / encode_max_relocation_count now key off the
    []Instruction slice (were int counts); bodies unchanged (* MAX_INST_SIZE).
  - encode_reserve(code, relocs, instructions): grows the caller's code []u8 by
    length and reserves relocs by capacity; allocates no new buffers.
  - decode_max_instruction_count / decode_estimate_instruction_count: exact
    ceiling and typical estimate, keyed off the min/avg instruction size per
    arch (fixed-4: arm64/mips/ppc/rsp; min-2: arm32/riscv/ppc_vle; min-1: mos).
  - decode_reserve(instructions, inst_info, label_defs, data, exact=false).

docs/cross_arch_design.md: helpers added to the naming contract.

No behavior change to the existing size helpers (signature only). All 10 ISAs
check + test green (x86 2282, arm32 600, arm64 461, mips 281, riscv 154, ppc 31,
ppc_vle 281, rsp 70, mos6502 148, mos65816 53).
2026-06-19 04:11:30 -04:00

457 lines
17 KiB
Odin

// rexcode · Brendan Punsky (dotbmp@github), original author
package rexcode_mips
import "core:rexcode/isa"
// =============================================================================
// MIPS DECODER
// =============================================================================
//
// Fixed-width 4-byte decoding pipeline. Two passes (parallel to x86):
//
// PASS 1 - read each instruction word in the given endianness, dispatch
// via the generated tables (DECODE_INDEX_PRIMARY plus the five
// sub-tables in tables.odin), and emit one Instruction
// + one Instruction_Info. Branch/jump operands are emitted as
// RELATIVE-kind operands carrying the *absolute* target byte
// offset within the decoded region.
//
// PASS 2 - call isa.infer_labels_from_branches to materialise label
// definitions at every in-range branch target, reusing IDs from
// `relocs` when available so symbolic names survive the round
// trip with the encoder.
//
// Performance: the table dispatch is O(1) primary lookup -> O(1) sub-bucket
// (where applicable) -> linear scan within a bucket that holds at most ~3
// entries for normal opcodes and ~37 for COP1 single-precision (the
// densest cell). Each candidate check is `(word & mask) == bits`, two
// dependent ALU ops; modern cores retire the comparison in <2 cycles.
//
// Style mirrors `encoder.odin`: hot inner procs are `#force_inline`, the
// per-instruction body collapses to one straight-line block.
// -----------------------------------------------------------------------------
// Per-decoded-instruction metadata (parallel to []Instruction).
// -----------------------------------------------------------------------------
//
// `offset` -- byte offset within `data` where this instruction starts.
// `decode_entry` -- index into DECODE_ENTRIES of the matched form; lets a
// printer query the Feature tag / flags without re-scanning.
Instruction_Info :: struct {
offset: u32,
decode_entry: u16,
_: u16,
}
#assert(size_of(Instruction_Info) == 8)
// =============================================================================
// decode()
// =============================================================================
decode :: proc(
data: []u8,
relocs: []Relocation,
instructions: ^[dynamic]Instruction,
inst_info: ^[dynamic]Instruction_Info,
label_defs: ^[dynamic]Label_Definition,
errors: ^[dynamic]Error,
endianness: Endianness = .BIG,
) -> (byte_count: u32, ok: bool) {
n_bytes := u32(len(data))
if n_bytes & 3 != 0 {
n_bytes &= ~u32(3) // ignore the dangling tail
}
errors_start := u32(len(errors))
pending_branches: [dynamic]isa.Branch_Target
defer delete(pending_branches)
// ---- PASS 1 -----------------------------------------------------------
for byte_count < n_bytes {
word := read_u32(data, byte_count, endianness)
inst: Instruction
info: Instruction_Info
entry_idx := decode_one_inline(word, byte_count, &inst, &info)
if entry_idx < 0 {
append(errors, Error{inst_idx = byte_count, code = .INVALID_OPCODE})
inst = Instruction{mnemonic = .INVALID, length = 4}
info = Instruction_Info{offset = byte_count}
} else {
inst_idx_for_branches := u32(len(instructions))
for slot in 0..<inst.operand_count {
op := &inst.ops[slot]
if op.kind == .RELATIVE && op.relative >= 0 {
append(&pending_branches, isa.Branch_Target{
inst_idx = inst_idx_for_branches,
op_idx = slot,
target = u32(op.relative),
})
}
}
}
append(instructions, inst)
append(inst_info, info)
byte_count += 4
}
// ---- PASS 2: label inference -----------------------------------------
isa.infer_labels_from_branches(pending_branches[:], byte_count, label_defs, relocs)
ok = u32(len(errors)) == errors_start
return
}
// =============================================================================
// Internal: decode one 32-bit word into Instruction + Instruction_Info
// =============================================================================
//
// Returns the matched DECODE_ENTRIES index on success, or -1 if no encoding
// form matches (caller emits INVALID_OPCODE).
@(private="file")
decode_one_inline :: #force_inline proc "contextless" (
word: u32, pc: u32, inst: ^Instruction, info: ^Instruction_Info,
) -> int {
primary := (word >> 26) & 0x3F
range: Decode_Index
switch primary {
case 0x00: range = DECODE_INDEX_SPECIAL [word & 0x3F]
case 0x01: range = DECODE_INDEX_REGIMM [(word >> 16) & 0x1F]
case 0x11: range = DECODE_INDEX_COP1 [(word >> 21) & 0x1F]
case 0x1C: range = DECODE_INDEX_SPECIAL2[word & 0x3F]
case 0x1F: range = DECODE_INDEX_SPECIAL3[word & 0x3F]
case: range = DECODE_INDEX_PRIMARY [primary]
}
if range.count == 0 { return -1 }
base := int(range.start)
cnt := int(range.count)
matched_idx := -1
for i in 0..<cnt {
e := &DECODE_ENTRIES[base + i]
if (word & e.mask) == e.bits {
matched_idx = base + i
break
}
}
if matched_idx < 0 { return -1 }
entry := &DECODE_ENTRIES[matched_idx]
inst.mnemonic = entry.mnemonic
inst.length = 4
inst.flags = {}
// R6 POP26/POP27 compact branches share opcodes 22/23 and are distinguished
// only by the rs/rt relationship. The more-specific pre-R6 (rt=0) and the
// rs=0 forms already matched earlier; here we refine the remaining group so
// rs==rt decodes as BGEZC/BLTZC rather than the general BGEC/BLTC.
#partial switch inst.mnemonic {
case .BGEC, .BLEZC, .BGEZC:
rs := (word >> 21) & 0x1F; rt := (word >> 16) & 0x1F
inst.mnemonic = rs == 0 ? .BLEZC : (rs == rt ? .BGEZC : .BGEC)
case .BLTC, .BGTZC, .BLTZC:
rs := (word >> 21) & 0x1F; rt := (word >> 16) & 0x1F
inst.mnemonic = rs == 0 ? .BGTZC : (rs == rt ? .BLTZC : .BLTC)
}
cnt_used: u8 = 0
if entry.ops[0] != .NONE {
inst.ops[0] = extract_operand_inline(word, pc, entry.ops[0], entry.enc[0])
cnt_used = 1
if entry.ops[1] != .NONE {
inst.ops[1] = extract_operand_inline(word, pc, entry.ops[1], entry.enc[1])
cnt_used = 2
if entry.ops[2] != .NONE {
inst.ops[2] = extract_operand_inline(word, pc, entry.ops[2], entry.enc[2])
cnt_used = 3
if entry.ops[3] != .NONE {
inst.ops[3] = extract_operand_inline(word, pc, entry.ops[3], entry.enc[3])
cnt_used = 4
}
}
}
}
inst.operand_count = cnt_used
info.offset = pc
info.decode_entry = u16(matched_idx)
return matched_idx
}
// -----------------------------------------------------------------------------
// Operand extractor -- inverse of pack_operand_inline in encoder.odin.
// -----------------------------------------------------------------------------
@(private="file")
extract_operand_inline :: #force_inline proc "contextless" (
word: u32, pc: u32, ot: Operand_Type, en: Operand_Encoding,
) -> Operand {
switch en {
case .NONE:
return {}
// Integer / typed register slots ----------------------------------------
case .RS:
return reg_operand(decode_reg(word, 21, ot), ot)
case .RT:
return reg_operand(decode_reg(word, 16, ot), ot)
case .RD:
return reg_operand(decode_reg(word, 11, ot), ot)
case .SHAMT:
return Operand{immediate = i64((word >> 6) & 0x1F), kind = .IMMEDIATE, size = 1}
// FPU register slots ----------------------------------------------------
case .FT:
return reg_operand(decode_reg(word, 16, ot), ot)
case .FS:
return reg_operand(decode_reg(word, 11, ot), ot)
case .FD:
return reg_operand(decode_reg(word, 6, ot), ot)
case .FR:
return reg_operand(decode_reg(word, 21, ot), ot)
case .GPR_AT_6:
return reg_operand(decode_reg(word, 6, ot), ot)
case .GPR_AT_11:
return reg_operand(decode_reg(word, 11, ot), ot)
case .DSP_SA:
return Operand{immediate = i64((word >> 21) & 0xF), kind = .IMMEDIATE, size = 1}
case .RS_RT:
return reg_operand(decode_reg(word, 16, ot), ot)
case .AC_NUM:
return Operand{immediate = i64((word >> 11) & 0x3), kind = .IMMEDIATE, size = 1}
case .SHILO_IMM:
v := i32((word >> 20) & 0x3F)
if v & 0x20 != 0 { v |= ~i32(0x3F) }
return Operand{immediate = i64(v), kind = .IMMEDIATE, size = 1}
case .EXT_SIZE:
return Operand{immediate = i64((word >> 21) & 0x1F), kind = .IMMEDIATE, size = 1}
// Immediates ------------------------------------------------------------
case .IMM_16:
imm: i64
if ot == .IMM16S {
imm = i64(i16(word & 0xFFFF)) // sign-extend
} else {
imm = i64(word & 0xFFFF)
}
return Operand{immediate = imm, kind = .IMMEDIATE, size = 2}
case .IMM_5:
return Operand{immediate = i64((word >> 6) & 0x1F), kind = .IMMEDIATE, size = 1}
case .IMM_20:
return Operand{immediate = i64((word >> 6) & 0xFFFFF), kind = .IMMEDIATE, size = 4}
case .IMM_26:
if ot == .REL_J26 {
// J-type: target_addr = ((PC+4) & 0xF0000000) | (field << 2).
// The high 4 bits come from PC; we don't have base_address
// at decode time, so the target reflects the data buffer's
// own region (top 4 bits derived from `pc`).
field := word & 0x3FFFFFF
target := ((pc + 4) & 0xF0000000) | (field << 2)
return Operand{relative = i64(target), kind = .RELATIVE, size = 4}
}
return Operand{immediate = i64(word & 0x3FFFFFF), kind = .IMMEDIATE, size = 4}
// Memory: rs(base) + signed imm16(disp) --------------------------------
case .OFFSET_BASE:
base_hw := u16((word >> 21) & 0x1F)
disp := i32(i16(word & 0xFFFF)) // sign-extend
m := Memory{base = Register(REG_GPR | base_hw), disp = disp}
size: u8 = 4
return Operand{mem = m, kind = .MEMORY, size = size}
// PC-relative branches --------------------------------------------------
case .BRANCH_16:
rel := i32(i16(word & 0xFFFF)) << 2
target := u32(i32(pc) + 4 + rel)
return Operand{relative = i64(target), kind = .RELATIVE, size = 4}
case .BRANCH_21:
rel21 := i32(word & 0x1FFFFF)
if rel21 & (1 << 20) != 0 { rel21 |= ~i32(0x1FFFFF) }
target := u32(i32(pc) + 4 + (rel21 << 2))
return Operand{relative = i64(target), kind = .RELATIVE, size = 4}
case .BRANCH_26:
rel26 := i32(word & 0x3FFFFFF)
if rel26 & (1 << 25) != 0 { rel26 |= ~i32(0x3FFFFFF) }
target := u32(i32(pc) + 4 + (rel26 << 2))
return Operand{relative = i64(target), kind = .RELATIVE, size = 4}
case .BRANCH_19:
// R6 PC-relative load: relative to this instruction (no +4), << 2.
rel19 := i32(word & 0x7FFFF)
if rel19 & (1 << 18) != 0 { rel19 |= ~i32(0x7FFFF) }
target := u32(i32(pc) + (rel19 << 2))
return Operand{relative = i64(target), kind = .RELATIVE, size = 4}
case .BRANCH_18:
// LDPC: relative to this instruction aligned down to 8, << 3.
rel18 := i32(word & 0x3FFFF)
if rel18 & (1 << 17) != 0 { rel18 |= ~i32(0x3FFFF) }
target := u32((i32(pc) &~ i32(7)) + (rel18 << 3))
return Operand{relative = i64(target), kind = .RELATIVE, size = 4}
// Misc small immediates -------------------------------------------------
case .FCC_BC:
return Operand{immediate = i64((word >> 18) & 0x7), kind = .IMMEDIATE, size = 1}
case .FCC_CC:
return Operand{immediate = i64((word >> 8) & 0x7), kind = .IMMEDIATE, size = 1}
case .SEL:
return Operand{immediate = i64(word & 0x7), kind = .IMMEDIATE, size = 1}
case .IMPL:
return {} // implicit operand -- bits already in static pattern
// GTE cofun sub-fields --------------------------------------------------
case .GTE_SF_BIT:
return Operand{immediate = i64((word >> 19) & 0x1), kind = .IMMEDIATE, size = 1}
case .GTE_MX_BITS:
return Operand{immediate = i64((word >> 17) & 0x3), kind = .IMMEDIATE, size = 1}
case .GTE_V_BITS:
return Operand{immediate = i64((word >> 15) & 0x3), kind = .IMMEDIATE, size = 1}
case .GTE_CV_BITS:
return Operand{immediate = i64((word >> 13) & 0x3), kind = .IMMEDIATE, size = 1}
case .GTE_LM_BIT:
return Operand{immediate = i64((word >> 10) & 0x1), kind = .IMMEDIATE, size = 1}
case .VFPU_VD:
return Operand{reg = Register(REG_VFPU | u16(word & 0x7F)), kind = .REGISTER, size = 4}
case .VFPU_VS:
return Operand{reg = Register(REG_VFPU | u16((word >> 8) & 0x7F)), kind = .REGISTER, size = 4}
case .VFPU_VT:
return Operand{reg = Register(REG_VFPU | u16((word >> 16) & 0x7F)), kind = .REGISTER, size = 4}
case .VFPU_VT_MEM:
hw := ((word >> 16) & 0x1F) << 2 | (word & 0x3)
return Operand{reg = Register(REG_VFPU | u16(hw)), kind = .REGISTER, size = 4}
case .VFPU_OFFSET_BASE:
base := Register(REG_GPR | u16((word >> 21) & 0x1F))
disp := i32(word & 0xFFFC)
if disp & 0x8000 != 0 { disp |= ~i32(0xFFFF) } // sign-extend from bit 15
return Operand{mem = Memory{base = base, disp = disp}, kind = .MEMORY, size = 4}
case .VFPU_PFX:
return Operand{immediate = i64(word & 0xFFFFF), kind = .IMMEDIATE, size = 4}
case .VFPU_CONST:
return Operand{immediate = i64((word >> 16) & 0x1F), kind = .IMMEDIATE, size = 1}
case .VFPU_COND4:
return Operand{immediate = i64(word & 0xF), kind = .IMMEDIATE, size = 1}
case .VFPU_CC3:
return Operand{immediate = i64((word >> 18) & 0x7), kind = .IMMEDIATE, size = 1}
// MSA 3R-format register slots.
case .WD:
return Operand{reg = Register(REG_MSA | u16((word >> 6) & 0x1F)), kind = .REGISTER, size = 4}
case .WS:
return Operand{reg = Register(REG_MSA | u16((word >> 11) & 0x1F)), kind = .REGISTER, size = 4}
case .WT:
return Operand{reg = Register(REG_MSA | u16((word >> 16) & 0x1F)), kind = .REGISTER, size = 4}
// MSA immediates / displacements.
case .MSA_I5:
return Operand{immediate = i64((word >> 16) & 0x1F), kind = .IMMEDIATE, size = 1}
case .MSA_S10:
v := i32((word >> 16) & 0x3FF)
if v & 0x200 != 0 { v |= ~i32(0x3FF) }
return Operand{immediate = i64(v), kind = .IMMEDIATE, size = 2}
case .MSA_BIT5:
return Operand{immediate = i64((word >> 11) & 0x1F), kind = .IMMEDIATE, size = 1}
case .MSA_BIT_SHIFT:
// m at 22:16; df from the marker, shift = m - marker.
m := (word >> 16) & 0x7F
sh: u32
if m >= 0x70 { sh = m & 0x07 }
else if m >= 0x60 { sh = m & 0x0F }
else if m >= 0x40 { sh = m & 0x1F }
else { sh = m & 0x3F }
return Operand{immediate = i64(sh), kind = .IMMEDIATE, size = 1}
case .MSA_ELM_IDX:
// n at 21:16; df from the marker, index = n - marker.
n := (word >> 16) & 0x3F
idx: u32
if n >= 0x38 { idx = n & 0x01 }
else if n >= 0x30 { idx = n & 0x03 }
else if n >= 0x20 { idx = n & 0x07 }
else { idx = n & 0x0F }
return Operand{immediate = i64(idx), kind = .IMMEDIATE, size = 1}
case .MSA_I8:
return Operand{immediate = i64((word >> 16) & 0xFF), kind = .IMMEDIATE, size = 1}
case .MSA_OFFSET_BASE_B, .MSA_OFFSET_BASE_H, .MSA_OFFSET_BASE_W, .MSA_OFFSET_BASE_D:
shift: u32 = 0
#partial switch en {
case .MSA_OFFSET_BASE_H: shift = 1
case .MSA_OFFSET_BASE_W: shift = 2
case .MSA_OFFSET_BASE_D: shift = 3
}
base_hw := u8((word >> 11) & 0x1F)
v := i32((word >> 16) & 0x3FF)
if v & 0x200 != 0 { v |= ~i32(0x3FF) }
return Operand{
mem = Memory{
base = Register(REG_GPR | u16(base_hw)),
disp = v << shift,
},
kind = .MEMORY, size = 4,
}
}
return {}
}
@(private="file")
decode_reg :: #force_inline proc "contextless" (word: u32, shift: u8, ot: Operand_Type) -> Register {
hw: u16 = u16((word >> shift) & 0x1F)
class: u16 = REG_GPR
#partial switch ot {
case .FPR_S, .FPR_D, .FPR_W, .FPR_L, .FPR_PS:
class = REG_FPR
case .FCR:
class = REG_FCR
case .CP0_REG:
class = REG_CP0
case .CP2_REG:
class = REG_CP2D
case .CP2_CTRL:
class = REG_CP2C
case .VFPU_S, .VFPU_P, .VFPU_T, .VFPU_Q, .VFPU_M_P, .VFPU_M_T, .VFPU_M_Q:
class = REG_VFPU
}
return Register(class | hw)
}
@(private="file")
reg_operand :: #force_inline proc "contextless" (r: Register, ot: Operand_Type) -> Operand {
size: u8 = 4
if ot == .FPR_D || ot == .FPR_L || ot == .FPR_PS {
size = 8
}
return Operand{reg = r, kind = .REGISTER, size = size}
}
// -----------------------------------------------------------------------------
// Buffer-Sizing Helpers (let callers pre-size so the decode hot path never
// reallocates; allocates no new buffers -- only the caller's arrays grow).
// -----------------------------------------------------------------------------
// Instruction-count ceiling for `data` (MIPS instructions are 4 bytes).
@(require_results)
decode_max_instruction_count :: #force_inline proc "contextless" (data: []u8) -> int {
return len(data) / 4
}
// Typical-case estimate of the instruction count for `data`.
@(require_results)
decode_estimate_instruction_count :: #force_inline proc "contextless" (data: []u8) -> int {
return len(data) / 4 + 8
}
// Pre-size the caller's decode output arrays for `data` (reserves on top of any
// existing elements; nil to skip; exact=true for the ceiling, else the estimate).
decode_reserve :: proc(instructions: ^[dynamic]Instruction, inst_info: ^[dynamic]Instruction_Info, label_defs: ^[dynamic]Label_Definition, data: []u8, exact: bool = false) {
n := exact ? decode_max_instruction_count(data) : decode_estimate_instruction_count(data)
if instructions != nil { reserve(instructions, len(instructions) + n) }
if inst_info != nil { reserve(inst_info, len(inst_info) + n) }
if label_defs != nil { reserve(label_defs, len(label_defs) + n) }
}