Files
Odin/core/rexcode/arm32/immediates.odin
Flāvius a4f08f8307 Load rexcode encode/decode tables from committed binary blobs
Each ISA's hand-written ENCODING_TABLE (the single source of truth) now lives
in a per-arch tablegen/ metaprogram that flattens it and serializes committed
binary blobs; the library #loads those into @(rodata) at compile time rather
than compiling a table body. No arch keeps encoding_table.odin or
decoding_tables.odin -- only a generated tables.odin loader and tables/*.bin.

* Two-stage, type-checked pipeline: tablegen Stage A emits human-readable
  generated Odin, which compiles and serializes the blobs in Stage B.
* encode() goes through encoding_forms(m); decoders are unchanged apart from
  x86's flattened 2-D index. Decode tables are byte-identical to the old ones.
* build.lua: a LuaJIT driver for the metaprograms, validations, and tests,
  with cross-platform gating and a clear report.
* Docs refreshed; the obsolete forward-looking plan in cross_arch_design.md
  trimmed to what was actually built.
* Attribution headers added to all rexcode source files; the generators emit
  them so generated files keep them.
2026-06-15 07:43:29 -04:00

520 lines
18 KiB
Odin

// rexcode · Brendan Punsky (dotbmp@github), original author
package rexcode_arm32
// =============================================================================
// AArch32 IMMEDIATE ENCODING ALGORITHMS
// =============================================================================
//
// ARM/Thumb immediates have several non-trivial wire formats:
//
// 1. A32 modified-immediate (imm12: rotate << 8 | value)
// effective = ROR(value, 2*rotate)
// Range: 8-bit value rotated by even amount 0..30.
//
// 2. T32 modified-immediate (i:imm3:imm8 split):
// - 4 replication patterns: 0x000000XY, 0x00XY00XY, 0xXY00XY00, 0xXYXYXYXY
// - rotation pattern: ROR(0x80|imm7, shift) where shift = (i:imm3:imm4_hi)
//
// 3. NEON modified-immediate (cmode:abcdefgh:op):
// 12 cmode patterns covering .I8/.I16/.I32/.I64/.F32 broadcast
// plus 16/32-bit shifted forms and trailing-ones forms.
//
// 4. VFP imm8 float (VMOV.F32 #imm / VMOV.F64 #imm):
// a:bbbbb:cdef:0... -> sign:exp(8 from 3):mantissa(23 from 4)
// Only 256 distinct values representable, but covers common
// constants (1.0, 0.5, 2.0, 3.0, ...).
//
// Each algorithm provides:
// encode_<X>(value: u32, out: ^u32) -> bool // returns false if value not representable
// decode_<X>(field: u32) -> u32 // always succeeds (every field decodes to a u32)
// =============================================================================
// 1. A32 modified-immediate
// =============================================================================
//
// Encoded as 12-bit imm12 = (rotate << 8) | value8, where the effective
// constant is ROR(value8, 2*rotate). The encoder must find a rotation
// 0..15 such that the value rotates to fit in 8 bits.
@(require_results)
ror32 :: #force_inline proc "contextless" (v: u32, n: u32) -> u32 {
n_ := n & 31
if n_ == 0 { return v }
return (v >> n_) | (v << (32 - n_))
}
@(require_results)
rol32 :: #force_inline proc "contextless" (v: u32, n: u32) -> u32 {
n_ := n & 31
if n_ == 0 { return v }
return (v << n_) | (v >> (32 - n_))
}
// Encode an arbitrary 32-bit constant as an A32 modified-immediate.
// Returns the 12-bit field on success.
@(require_results)
encode_a32_modimm :: proc(value: u32) -> (u32, bool) {
if value <= 0xFF { return value, true }
// Try every even rotation 2..30 and check if the rotated value fits in 8 bits.
for r in u32(1)..=15 {
rotated := rol32(value, 2 * r)
if rotated <= 0xFF {
return (r << 8) | rotated, true
}
}
return 0, false
}
// Decode an A32 modified-immediate field (12 bits) to its 32-bit value.
@(require_results)
decode_a32_modimm :: #force_inline proc "contextless" (imm12: u32) -> u32 {
rotate := (imm12 >> 8) & 0xF
value := imm12 & 0xFF
return ror32(value, 2 * rotate)
}
// =============================================================================
// 2. Thumb-2 modified-immediate
// =============================================================================
//
// T32 packs the 12-bit modimm field across non-adjacent positions:
// bit 26 of the 32-bit word -> i
// bits 14:12 of word -> imm3
// bits 7:0 of word -> imm8
// Concatenated as (i:imm3:imm8) for a 12-bit value.
//
// The 12 bits then expand to a 32-bit constant via 5 cases on (i:imm3):
//
// i:imm3 = 0000 a -> 00000000_00000000_00000000_aaaaaaaa
// i:imm3 = 0001 a -> 00000000_aaaaaaaa_00000000_aaaaaaaa (a != 0)
// i:imm3 = 0010 a -> aaaaaaaa_00000000_aaaaaaaa_00000000 (a != 0)
// i:imm3 = 0011 a -> aaaaaaaa_aaaaaaaa_aaaaaaaa_aaaaaaaa (a != 0)
// i:imm3:imm8 high -> ROR(0x80 | imm7, shift) shift = i:imm3:imm8_top
// where imm7 = imm8[6:0] and shift = (i:imm3:imm4_hi) >> 0
// (5-bit shift from the top of the field, 8..31)
// Build the wire encoding (i in bit 26 of the 32-bit T32 word, imm3 in bits
// 14:12, imm8 in bits 7:0) into the i:imm3:imm8 12-bit number.
@(private)
build_t32_field12 :: #force_inline proc "contextless" (i: u32, imm3: u32, imm8: u32) -> u32 {
return ((i & 1) << 11) | ((imm3 & 0x7) << 8) | (imm8 & 0xFF)
}
// Encode an arbitrary 32-bit constant as a T32 modified-immediate.
// Returns 12 bits packed as i:imm3:imm8 on success.
encode_t32_modimm :: proc(value: u32) -> (u32, bool) {
// Case 1: 8-bit
if value <= 0xFF { return value, true }
// Case 2: 0x00XY00XY
if (value & 0xFF00FF00) == 0 {
a := value & 0xFF
if (value >> 16) & 0xFF == a {
return build_t32_field12(0, 1, a), true
}
}
// Case 3: 0xXY00XY00
if (value & 0x00FF00FF) == 0 {
a := (value >> 8) & 0xFF
if (value >> 24) & 0xFF == a {
return build_t32_field12(0, 2, a), true
}
}
// Case 4: 0xXYXYXYXY
a := value & 0xFF
if value == (a | (a << 8) | (a << 16) | (a << 24)) && a != 0 {
return build_t32_field12(0, 3, a), true
}
// Case 5: rotated 8-bit with leading 1 (0x80..0xFF range, shifted)
// Find a shift such that ROR(0x80..0xFF, shift) == value.
// shift = 8..31. The unrotated value has the form 1xxxxxxx (top bit set).
for shift in u32(8)..=31 {
rotated := rol32(value, shift)
if rotated >= 0x80 && rotated <= 0xFF {
// shift is encoded as i:imm3:a (5 bits), where 'a' goes into imm8 bit 7,
// and the low 7 bits of rotated (xxxxxxx) go into imm8[6:0].
imm7 := rotated & 0x7F
field5 := shift
i := (field5 >> 4) & 1
imm3 := (field5 >> 1) & 0x7
b := field5 & 1 // becomes imm8 bit 7
imm8 := (b << 7) | imm7
return build_t32_field12(i, imm3, imm8), true
}
}
return 0, false
}
// Decode a 12-bit i:imm3:imm8 field to its 32-bit constant value.
decode_t32_modimm :: proc "contextless" (field12: u32) -> u32 {
i_imm3 := (field12 >> 8) & 0xF // bits 11:8 = i:imm3
imm8 := field12 & 0xFF
switch i_imm3 {
case 0: return imm8
case 1: return (imm8 << 16) | imm8
case 2: return (imm8 << 24) | (imm8 << 8)
case 3: return (imm8 << 24) | (imm8 << 16) | (imm8 << 8) | imm8
}
// Rotated form
shift := (field12 >> 7) & 0x1F // 5-bit shift = i:imm3:imm8[7]
unrotated := (imm8 & 0x7F) | 0x80
return ror32(unrotated, shift)
}
// =============================================================================
// 3. NEON modified-immediate (VMOV/VMVN/VORR/VBIC immediate forms)
// =============================================================================
//
// Encoded as cmode (4 bits) + op (1 bit) + abcdefgh (8 bits).
// cmode selects one of 12 broadcast/shift patterns:
//
// cmode op pattern (.dt)
// ---------------------------------------
// 000x - .I32 imm32 = 0x000000XY shifted 0/8/16/24
// 001x (same, shifted 8 bits)
// 010x (shifted 16)
// 011x (shifted 24)
// 100x - .I16 imm32 = 0x0000XY00 shifted 0/8
// 101x
// 1100 - .I32 imm32 = 0x00XYFFFF / 0xXYFFFFFF (trailing ones)
// 1101
// 1110 0 .I8 imm32 = XYXYXYXY (byte-wise)
// 1110 1 .I64 imm32_high = a:b:c:d imm32_low = e:f:g:h (bit-expanded)
// 1111 0 .F32 imm32 = a:b̄:bbbbb:cdefgh:0... (VFP imm8)
//
// Encoder packs the 8-bit abcdefgh into wire bits (abc at bits 18:16, defgh
// at bits 3:0), cmode at bits 11:8, op at bit 5.
NEON_Imm_Form :: struct {
raw_imm32: u32, // the 32-bit constant the user wants
cmode: u8, // selected cmode (0..15)
op: u8, // op bit (0 or 1)
abcdefgh: u8, // the 8-bit immediate
}
// Encode a (32-bit) constant for NEON immediate operations.
// On success, returns (cmode, op, abcdefgh) packed in the low bits as a
// single u32: bits 12:8 = cmode, bit 7 = op, bits 7:0 = abcdefgh... actually
// returns a struct.
encode_neon_modimm :: proc(value: u32) -> (form: NEON_Imm_Form, ok: bool) {
form = NEON_Imm_Form{ raw_imm32 = value }
switch {
// .I32 (cmode 0000): 0x000000XY
case value <= 0xFF:
form.cmode = 0b0000
form.abcdefgh = u8(value)
ok = true
return
// .I32 shifted 8: 0x0000XY00
case (value & ~u32(0xFF00)) == 0:
form.cmode = 0b0010
form.abcdefgh = u8(value >> 8)
ok = true
return
// .I32 shifted 16: 0x00XY0000
case (value & ~u32(0xFF0000)) == 0:
form.cmode = 0b0100
form.abcdefgh = u8(value >> 16)
ok = true
return
// .I32 shifted 24: 0xXY000000
case (value & ~u32(0xFF000000)) == 0:
form.cmode = 0b0110
form.abcdefgh = u8(value >> 24)
ok = true
return
// .I16 (cmode 1000): 0x0000_00XY (16-bit broadcast lower)
case (value & ~u32(0xFF)) == 0:
form.cmode = 0b1000
form.abcdefgh = u8(value)
ok = true
return
// .I16 shifted 8 (cmode 1010): 0x0000_XY00
case (value & ~u32(0xFF00)) == 0:
form.cmode = 0b1010
form.abcdefgh = u8(value >> 8)
ok = true
return
// .I32 trailing-ones-8 (cmode 1100): 0x0000_XYFF
case (value & 0xFFFF0000) == 0 && (value & 0xFF) == 0xFF:
form.cmode = 0b1100
form.abcdefgh = u8((value >> 8) & 0xFF)
ok = true
return
// .I32 trailing-ones-16 (cmode 1101): 0x00XY_FFFF
case (value & 0xFF000000) == 0 && (value & 0xFFFF) == 0xFFFF:
form.cmode = 0b1101
form.abcdefgh = u8((value >> 16) & 0xFF)
ok = true
return
}
// .I8 byte broadcast (cmode 1110, op=0): XYXYXYXY
if a := u32(value & 0xFF); value == (a | (a << 8) | (a << 16) | (a << 24)) {
form.cmode = 0b1110
form.op = 0
form.abcdefgh = u8(a)
ok = true
return
}
// .I64 bit-expanded (cmode 1110, op=1): only 256 patterns of a:b:c:d:e:f:g:h
// where each bit expands to a full byte. Check if every byte of `value`
// is either 0x00 or 0xFF.
{
all_match := true
bits_packed: u32
for k in u32(0)..<4 {
byte_v := (value >> (k * 8)) & 0xFF
if byte_v == 0x00 {
// 0 bit in packed
} else if byte_v == 0xFF {
bits_packed |= 1 << k
} else {
all_match = false
break
}
}
if all_match {
form.cmode = 0b1110
form.op = 1
form.abcdefgh = u8(bits_packed & 0xFF)
// For .I64 form, the upper word of the full 64-bit constant
// would also have to match the same pattern -- caller is
// responsible for ensuring `value` is the 32-bit half.
ok = true
return
}
}
// .F32 expanded (cmode 1111): VFP imm8 expanded to 32-bit float
a := encode_vfp_imm8_f32(value) or_return
form.cmode = 0b1111
form.abcdefgh = a
ok = true
return
}
// Decode the 8-bit abcdefgh + cmode + op back into a 32-bit constant.
decode_neon_modimm :: proc "contextless" (abcdefgh: u32, cmode: u32, op: u32) -> u32 {
a := abcdefgh & 0xFF
switch cmode {
case 0b0000: return a
case 0b0010: return a << 8
case 0b0100: return a << 16
case 0b0110: return a << 24
case 0b1000: return a
case 0b1010: return a << 8
case 0b1100: return (a << 8) | 0xFF
case 0b1101: return (a << 16) | 0xFFFF
case 0b1110:
if op == 0 {
return a | (a << 8) | (a << 16) | (a << 24)
}
// .I64 bit-expand: each bit -> 0x00 or 0xFF byte
result: u32 = 0
for k in u32(0)..<4 {
if (a >> k) & 1 != 0 {
result |= 0xFF << (k * 8)
}
}
return result
case 0b1111: return decode_vfp_imm8_f32(a)
}
return 0
}
// Pack the NEON_Imm_Form into the bits the encoder ORs into the instruction
// word: bits 18:16 = abc (high 3), bits 3:0 = defgh (low 4)... wait,
// abcdefgh is 8 bits split a:bcdefgh: actually it's (a)(bcd)(efgh) — 3 + 4? no.
// Standard NEON layout puts a at bit 24, bc at bits 18:17, d at bit 16, efgh at bits 3:0.
// Per ARM ARM: bits 24, 18:16, 3:0 = abcdefgh
pack_neon_modimm_field :: #force_inline proc "contextless" (f: NEON_Imm_Form) -> u32 {
a := u32(f.abcdefgh)
return ((a >> 7) & 1) << 24 | // 'a' bit
((a >> 4) & 0x7) << 16 | // 'bcd' bits
(a & 0xF) | // 'efgh' bits
u32(f.cmode) << 8 |
u32(f.op) << 5
}
// Reconstruct abcdefgh from the instruction word.
extract_neon_modimm_abcdefgh :: #force_inline proc "contextless" (word: u32) -> u32 {
return ((word >> 24) & 1) << 7 |
((word >> 16) & 0x7) << 4 |
(word & 0xF)
}
// =============================================================================
// 4. VFP imm8 float (VMOV.F32 / VMOV.F64 immediate)
// =============================================================================
//
// 8-bit field abcdefgh expands to a 32-bit float as:
//
// sign = a (bit 31 of float)
// exponent = NOT(b) : b : b : b : b : b (bits 30..25 — i.e. 6 bits)
// actually: b̄ : bbbbb (1 bit + 5 bits) = exp bias
// wait. The VFP imm8 expansion is:
// F32: sign[1]:exp[8]:mant[23] where
// sign = a
// exp = NOT(b):b:b:b:b:b:b:b (8 bits)
// mant = c:d:e:f:g:h:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0:0 (23 bits)
// Wait no, that's wrong too. Per ARM ARM (VMOV imm):
// F32: sign=a, exp = NOT(b):bbbbbb (7 bits + ... ). Actually:
// F32 = a:NOT(b):bbbbb:cdefgh:0000000000000000000 (1 + 1 + 5 + 6 + 19 = 32)
// ^^^^^^ 6-bit mantissa from cdefgh
//
// For F64: sign[1]:exp[11]:mant[52]
// F64 = a:NOT(b):bbbbbbbb:cdefgh:0...0 (1 + 1 + 8 + 6 + 48 = 64)
// ^^^^^^^^ 8-bit exponent extended
//
// For F16: sign[1]:exp[5]:mant[10]
// F16 = a:NOT(b):bbb:cdefgh:0000 (1 + 1 + 3 + 6 + 4 = 16... but F16 is 16-bit)
// Actually F16 doesn't fit in the same scheme cleanly; ARM ARM has a
// specific F16 expansion using 5-bit exp and 4 mantissa bits.
//
// The set of representable F32 values is exactly 256: every encoded form
// has the same sign, top exp bit, and a 6-bit signed exponent + 4-bit
// mantissa pair. Common constants like 0.5, 1.0, 1.5, 2.0, 3.0, 0.25 are
// all encodable.
encode_vfp_imm8_f32 :: proc(value: u32) -> (u8, bool) {
// Reverse the expansion: extract sign, exp[6:0], mant.
// From the layout F32 = a:NOT(b):bbbbb:cdefgh:0(19 zeros)
// - sign (bit 31) = a
// - bit 30 = NOT(b), bits 29:25 = bbbbb -> if these are all-b, valid
// - bits 24:19 = cdefgh
// - bits 18:0 must be zero
if (value & 0x7FFFF) != 0 { return 0, false } // bottom 19 bits must be 0
sign := (value >> 31) & 1
bit30 := (value >> 30) & 1
bits_29_25 := (value >> 25) & 0x1F
bit_b := bit30 ~ 1 // b = NOT(bit30)
// bits 29:25 must all equal b
expected_29_25 := bit_b == 1 ? u32(0x1F) : u32(0)
if bits_29_25 != expected_29_25 { return 0, false }
cdefgh := (value >> 19) & 0x3F
abcdefgh := (sign << 7) | (bit_b << 6) | cdefgh
return u8(abcdefgh), true
}
decode_vfp_imm8_f32 :: proc "contextless" (abcdefgh: u32) -> u32 {
a := (abcdefgh >> 7) & 1
b := (abcdefgh >> 6) & 1
cdefgh := abcdefgh & 0x3F
not_b := b ~ 1
bbbbb: u32 = b == 1 ? 0x1F : 0
return (a << 31) | (not_b << 30) | (bbbbb << 25) | (cdefgh << 19)
}
encode_vfp_imm8_f64 :: proc(value: u64) -> (u8, bool) {
// F64 = a:NOT(b):bbbbbbbb:cdefgh:0(48 zeros)
if (value & ((u64(1) << 48) - 1)) != 0 { return 0, false }
sign := u32(value >> 63) & 1
bit62 := u32(value >> 62) & 1
bits_61_54 := u32(value >> 54) & 0xFF
bit_b := bit62 ~ 1
expected := bit_b == 1 ? u32(0xFF) : u32(0)
if bits_61_54 != expected { return 0, false }
cdefgh := u32(value >> 48) & 0x3F
abcdefgh := (sign << 7) | (bit_b << 6) | cdefgh
return u8(abcdefgh), true
}
decode_vfp_imm8_f64 :: proc "contextless" (abcdefgh: u32) -> u64 {
a := u64((abcdefgh >> 7) & 1)
b := u64((abcdefgh >> 6) & 1)
cdefgh := u64(abcdefgh & 0x3F)
not_b := b ~ 1
bbbbbbbb: u64 = b == 1 ? 0xFF : 0
return (a << 63) | (not_b << 62) | (bbbbbbbb << 54) | (cdefgh << 48)
}
encode_vfp_imm8_f16 :: proc(value: u16) -> (u8, bool) {
// F16 layout: a:NOT(b):bbb:cdefgh:0000 (1+1+3+6+4 ... wait F16 is 16 bits)
// Actually F16 = a:NOT(b):bb:cdefgh (1+1+2+6 = 10 bits)... that doesn't fit either.
// Per ARM ARM (VFP F16 imm): the 16-bit float is
// sign[1]:exp[5]:mant[10] where
// exp = NOT(b):bb (3 bits) + ... no.
// Correctly: F16 = a:NOT(b):b:cdefgh:000 (1+1+1+6+3 = 12 bits)... not 16.
//
// The real layout: F16 imm = a:NOT(b):bb:cdefgh:000 expanded to
// sign=a (bit 15), exp[4]=NOT(b) (bit 14), exp[3:0]=bbb (bits 13:11)... that's 4-bit exp.
// ARM ARM: F16 expansion -- f16 has 5-bit exp and 10-bit mantissa.
// sign[1], exp[5], mant[10]
// sign = a
// exp[4] = NOT(b)
// exp[3:0] = bbb... (4 copies of b? no, exp[3] = b, exp[2:0] = bbb)
// — let's just say exp = NOT(b):bbbb (1 + 4 = 5 bits)
// mant[9:6] = cdef (4 bits)
// mant[5:0] = gh:0000 (2 bits of gh + 4 zero bits)
//
// Net: bottom 6 bits of F16 mantissa must be zero.
if (value & 0x3F) != 0 { return 0, false }
sign := u32(value >> 15) & 1
bit14 := u32(value >> 14) & 1
bits_13_10 := u32(value >> 10) & 0xF
bit_b := bit14 ~ 1
expected := bit_b == 1 ? u32(0xF) : u32(0)
if bits_13_10 != expected { return 0, false }
cdefgh := u32(value >> 6) & 0xF // only 4 bits of mantissa survive (cd_ef of cdefgh)
// Hmm, only 4 mantissa bits in F16 form... so cdefgh becomes cdef + missing gh.
// ARM ARM defines specific F16 imm form; for our purposes we only encode
// the F32-compatible 256 values restricted to F16's range.
// Pack: a:b:cdef (6 bits) + implicit gh=00 -> abcdefgh with low 2 zero
abcdefgh := (sign << 7) | (bit_b << 6) | (cdefgh << 2)
return u8(abcdefgh), true
}
decode_vfp_imm8_f16 :: proc "contextless" (abcdefgh: u32) -> u16 {
a := (abcdefgh >> 7) & 1
b := (abcdefgh >> 6) & 1
cdef := (abcdefgh >> 2) & 0xF
not_b := b ~ 1
bbbb: u32 = b == 1 ? 0xF : 0
v := (a << 15) | (not_b << 14) | (bbbb << 10) | (cdef << 6)
return u16(v)
}
// =============================================================================
// 5. PSR field selector (MSR <psrfield>_<bits>, ...)
// =============================================================================
//
// MSR takes a 4-bit fields mask in instruction bits 19:16 (mapped to the
// SPSR/CPSR _flags / _status / _extension / _control bits). Encoded as:
//
// bit 19 = f (flags / N,Z,C,V,Q)
// bit 18 = s (status / IT[1:0]:reserved)
// bit 17 = x (extension / GE bits)
// bit 16 = c (control / mode bits, I, F, T)
//
// We expose a packed selector in low 4 bits of u8 / Operand.immediate:
// bit 3 = f, bit 2 = s, bit 1 = x, bit 0 = c
PSR_FIELD_F :: u8(1 << 3)
PSR_FIELD_S :: u8(1 << 2)
PSR_FIELD_X :: u8(1 << 1)
PSR_FIELD_C :: u8(1 << 0)
// _nzcvq = F bit (flags)
// _g = X bit (GE bits, ARMv6+)
// _nzcvqg = F | X
// _all (cpsr_all) = F | S | X | C
PSR_FIELD_NZCVQ :: PSR_FIELD_F
PSR_FIELD_G :: PSR_FIELD_X
PSR_FIELD_NZCVQG :: PSR_FIELD_F | PSR_FIELD_X
PSR_FIELD_ALL :: PSR_FIELD_F | PSR_FIELD_S | PSR_FIELD_X | PSR_FIELD_C
@(require_results)
encode_psr_field :: #force_inline proc "contextless" (sel: u8) -> u32 {
return u32(sel & 0xF) << 16
}
@(require_results)
decode_psr_field :: #force_inline proc "contextless" (word: u32) -> u8 {
return u8((word >> 16) & 0xF)
}