Files
Odin/core/rexcode/isa/x86/instructions.odin
Brendan Punsky 078015bc34 rexcode/x86: pre-matched encode hint + repair the typed builders
Targeted branchless revert + the pre-matched form fast path, and a fix
for a pre-existing bug the latter surfaced.

(a) Revert the two speculative-write spots from the prior branchless pass
    (legacy-prefix emission, widened displacement store, ENCODE_TAIL_SLACK)
    back to predicted branches. In real streams a legacy prefix is almost
    always absent and disp size is stable, so those branches are ~free and
    the unconditional stores only added work. Every class got faster
    (RET 19->17.5, MOV r,r 52->46.6, VADDPS 42.8->39.3 ns).

(b) Pre-matched form hint. Instruction.enc_hint (in the existing 11-byte
    padding, idx+1 biased; 0 = matcher path) lets a typed builder that maps
    to a single value-independent form bake the global form index, so
    encode() skips the O(forms) match scan -- and, in a varied stream, its
    unpredictable branches. Generated for non-immediate forms only (value-
    dependent imm8/imm32 selection stays on the matcher). On a 100k mixed
    typed-builder stream: 47.3 -> 30.2 ns/inst (-36%), byte-identical to the
    matcher path -- ~2x the original baseline for codegen.

Repair the typed inst_/emit_ builders. They were non-functional: the
generator cast the hw-only typed enum straight to Register
(Register(GPR64.RAX) -> class 0), so every typed-builder operand was
rejected by the matcher (encode returned empty). Untested because the
suite builds via the generic constructors. Now they build through the
class-correct op_gpr64/op_xmm/... path (op_* already used by 3+ operand
builders), emit_ reuses inst_, and a new 30-case consistency suite
asserts typed == generic (llvm-verified) and hint == matcher.

gen/builders/check/test/idempotent all green; 2276 cases.
2026-06-18 21:04:18 -04:00

262 lines
10 KiB
Odin

// rexcode · Brendan Punsky (dotbmp@github), original author
package rexcode_x86
// =============================================================================
// SECTION: 3. INSTRUCTION
// =============================================================================
// -----------------------------------------------------------------------------
// SECTION: 3.1 Instruction Flags and Rep Prefix
// -----------------------------------------------------------------------------
// Instruction flags for prefixes and modifiers
Instruction_Flags :: bit_field u8 {
lock: bool | 1,
rep: Rep | 2,
segment: u8 | 3, // 0=none, 1=ES, 2=CS, 3=SS, 4=DS, 5=FS, 6=GS
addr32: bool | 1, // address size override (32-bit in 64-bit mode)
data16: bool | 1, // operand size override (for 16-bit operands)
}
Rep :: enum u8 {
NONE,
REP, // REP/REPE/REPZ
REPNE, // REPNE/REPNZ
}
// -----------------------------------------------------------------------------
// SECTION: 3.2 Instruction STruct
// -----------------------------------------------------------------------------
Instruction :: struct #packed {
ops: [4]Operand `fmt:"v,operand_count`, // 48 bytes
mnemonic: Mnemonic, // 2 bytes
operand_count: u8, // 1 byte
flags: Instruction_Flags, // 1 byte
length: u8, // 1 byte (filled by decoder, used for iteration)
enc_hint: u16, // 2 bytes (pre-matched form, +1 biased; 0 = none)
_: [9]u8, // 9 bytes
}
#assert(size_of(Instruction) == 64)
// Pre-matched encoding hint: a typed builder that maps to exactly one encoding
// form (no value-dependent immediate selection) stores `global_form_index + 1`
// in `Instruction.enc_hint`, letting encode() skip the form-match scan. 0 means
// "no hint" -- the zero value, so every hand-built / generic-builder / decoded
// instruction stays on the matching path unchanged.
ENC_HINT_NONE :: u16(0)
@(require_results)
with_hint :: #force_inline proc "contextless" (inst: Instruction, hint: u16) -> Instruction {
inst := inst
inst.enc_hint = hint
return inst
}
// -----------------------------------------------------------------------------
// SECTION: 7.9 Instruction Builder Helpers
// -----------------------------------------------------------------------------
// Convenient instruction builders for common patterns
@(require_results)
inst_r_r :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination, source: Register) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 2,
ops = {op_reg(destination), op_reg(source), {}, {}},
}
}
@(require_results)
inst_r_m :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination: Register, source: Memory, size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 2,
ops = {op_reg(destination), op_mem(source, size), {}, {}},
}
}
@(require_results)
inst_m_r :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination: Memory, size: u8, source: Register) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 2,
ops = {op_mem(destination, size), op_reg(source), {}, {}},
}
}
@(require_results)
inst_r_i :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination: Register, immediate: i64, immediate_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 2,
ops = {op_reg(destination), Operand{immediate = immediate, kind = .IMMEDIATE, size = immediate_size}, {}, {}},
}
}
@(require_results)
inst_r :: #force_inline proc "contextless" (mnemonic: Mnemonic, r: Register) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 1,
ops = {op_reg(r), {}, {}, {}},
}
}
@(require_results)
inst_m :: #force_inline proc "contextless" (mnemonic: Mnemonic, m: Memory, size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 1,
ops = {op_mem(m, size), {}, {}, {}},
}
}
@(require_results)
inst_none :: #force_inline proc "contextless" (mnemonic: Mnemonic) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 0,
}
}
@(require_results)
inst_rel :: #force_inline proc "contextless" (mnemonic: Mnemonic, label_id: u32, size: u8 = 4) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 1,
ops = {op_label(label_id, size), {}, {}, {}},
}
}
// 3-operand register instructions (VEX/EVEX: VADDPS xmm0, xmm1, xmm2)
@(require_results)
inst_r_r_r :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination, source1, source2: Register) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 3,
ops = {op_reg(destination), op_reg(source1), op_reg(source2), {}},
}
}
// 3-operand register-register-memory (VEX/EVEX: VADDPS xmm0, xmm1, [mem])
@(require_results)
inst_r_r_m :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination, source1: Register, source2: Memory, size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 3,
ops = {op_reg(destination), op_reg(source1), op_mem(source2, size), {}},
}
}
// 3-operand register-register-immediate (e.g., SHLD r64, r64, imm8)
@(require_results)
inst_r_r_i :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination, source: Register, immediate: i64, immediate_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 3,
ops = {op_reg(destination), op_reg(source), Operand{immediate = immediate, kind = .IMMEDIATE, size = immediate_size}, {}},
}
}
// Memoryory-immediate (MOV [mem], imm32)
@(require_results)
inst_m_i :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination: Memory, size: u8, immediate: i64, immediate_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 2,
ops = {op_mem(destination, size), Operand{immediate = immediate, kind = .IMMEDIATE, size = immediate_size}, {}, {}},
}
}
// Single immediate (PUSH imm32, RET imm16, INT imm8, etc.)
@(require_results)
inst_i :: #force_inline proc "contextless" (mnemonic: Mnemonic, immediate: i64, immediate_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 1,
ops = {Operand{immediate = immediate, kind = .IMMEDIATE, size = immediate_size}, {}, {}, {}},
}
}
// 3-operand register-memory-immediate (IMUL r64, m64, imm32)
@(require_results)
inst_r_m_i :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination: Register, source: Memory, mem_size: u8, immediate: i64, immediate_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 3,
ops = {op_reg(destination), op_mem(source, mem_size), Operand{immediate = immediate, kind = .IMMEDIATE, size = immediate_size}, {}},
}
}
// 3-operand memory-register-immediate (SHLD m64, r64, imm8)
@(require_results)
inst_m_r_i :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination: Memory, mem_size: u8, source: Register, immediate: i64, immediate_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 3,
ops = {op_mem(destination, mem_size), op_reg(source), Operand{immediate = immediate, kind = .IMMEDIATE, size = immediate_size}, {}},
}
}
// Relative offset (JMP rel8, JCC rel32, etc.) - uses raw offset, not label
@(require_results)
inst_rel_offset :: #force_inline proc "contextless" (mnemonic: Mnemonic, offset: i64, offset_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 1,
ops = {Operand{immediate = offset, kind = .RELATIVE, size = offset_size}, {}, {}, {}},
}
}
// 3-operand register-memory-register (BEXTR r64, m64, r64)
@(require_results)
inst_r_m_r :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination: Register, source1: Memory, mem_size: u8, source2: Register) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 3,
ops = {op_reg(destination), op_mem(source1, mem_size), op_reg(source2), {}},
}
}
// 4-operand register instructions (EVEX with 4 operands)
@(require_results)
inst_r_r_r_r :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination, source1, source2, source3: Register) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 4,
ops = {op_reg(destination), op_reg(source1), op_reg(source2), op_reg(source3)},
}
}
// 4-operand: 3 registers + immediate (VCMPPS xmm, xmm, xmm, imm8)
@(require_results)
inst_r_r_r_i :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination, source1, source2: Register, immediate: i64, immediate_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 4,
ops = {op_reg(destination), op_reg(source1), op_reg(source2), Operand{immediate = immediate, kind = .IMMEDIATE, size = immediate_size}},
}
}
// 4-operand: 2 registers + memory + immediate (VCMPPS xmm, xmm, m128, imm8)
@(require_results)
inst_r_r_m_i :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination, source1: Register, source2: Memory, mem_size: u8, immediate: i64, immediate_size: u8) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 4,
ops = {op_reg(destination), op_reg(source1), op_mem(source2, mem_size), Operand{immediate = immediate, kind = .IMMEDIATE, size = immediate_size}},
}
}
// 4-operand: 2 registers + memory + register (VBLENDVPS xmm, xmm, m128, xmm)
@(require_results)
inst_r_r_m_r :: #force_inline proc "contextless" (mnemonic: Mnemonic, destination, source1: Register, source2: Memory, mem_size: u8, source3: Register) -> Instruction {
return Instruction{
mnemonic = mnemonic,
operand_count = 4,
ops = {op_reg(destination), op_reg(source1), op_mem(source2, mem_size), op_reg(source3)},
}
}