mirror of
https://github.com/odin-lang/Odin.git
synced 2026-06-19 16:42:33 +00:00
rexcode/x86: emit-descriptor fast path (precompiled per-form recipe)
Precompute each encoding form into a flat Form_Recipe -- prefix byte, escape+
opcode blob, role->operand-index slots, ext, imm size, flags -- so the encoder
replays common forms straight-line instead of re-interpreting enc.ops/enc.enc
on every instruction (the resolve scan, escape ladder, prefix/REX selection).
encode() takes the fast path when the form is hinted, eligible, has a register
r/m and a literal immediate; everything else falls through to the existing
interpreter, which stays the byte-exact source of truth. First cut:
- reg-direct ModR/M only (memory r/m falls back)
- hint path only (matcher / generic builders fall back)
- ~33% of forms eligible (VEX/EVEX, 16-bit operand-size, x87 fixed-ModR/M,
moffs/far/rel/implicit operands are marked ineligible)
Recipes are built at startup into static storage (no heap); this moves into the
table generator (#loaded like every other table) once the shape settles.
Realistic immediate-heavy mix: ~19.0 -> ~16.3 ns/inst (52.7 -> 61.3 M/s).
Byte-exact across 2282 cases + idempotent.
Next: memory r/m addressing in the fast path, then the matcher path, then the
gen-time port.
This commit is contained in:
@@ -141,6 +141,7 @@ encode :: proc(
|
||||
}
|
||||
|
||||
matched_enc: ^Encoding = nil
|
||||
form_index := -1 // index into ENCODE_FORMS / ENCODE_RECIPES; -1 = no recipe fast path
|
||||
|
||||
// Pre-matched form fast-path: a typed builder that maps to a single
|
||||
// encoding form bakes `global_index + 1` into enc_hint, letting us skip
|
||||
@@ -149,7 +150,8 @@ encode :: proc(
|
||||
// in long mode (the builders' target); bounds-checked; anything else
|
||||
// (hand-built, generic builders, i386, decode) falls back to matching.
|
||||
if mode == ._64 && inst.enc_hint != ENC_HINT_NONE && int(inst.enc_hint) <= len(ENCODE_FORMS) {
|
||||
matched_enc = &ENCODE_FORMS[inst.enc_hint - 1]
|
||||
form_index = int(inst.enc_hint) - 1
|
||||
matched_enc = &ENCODE_FORMS[form_index]
|
||||
} else {
|
||||
// Find matching encoding from table (O(1) mnemonic lookup)
|
||||
encodings := encoding_forms(inst.mnemonic)
|
||||
@@ -174,6 +176,23 @@ encode :: proc(
|
||||
}
|
||||
}
|
||||
|
||||
// Recipe fast path: for a hinted, eligible form with a register r/m and a
|
||||
// literal immediate, emit straight-line from the precomputed recipe and
|
||||
// skip the interpreter (resolve scan, prefix/REX/escape selection) below.
|
||||
// Anything outside that envelope falls through to the interpreter, which
|
||||
// stays the byte-exact source of truth.
|
||||
if form_index >= 0 && form_index < len(ENCODE_RECIPES) {
|
||||
recipe := &ENCODE_RECIPES[form_index]
|
||||
if recipe.flags.eligible && transmute(u8)inst.flags == 0 {
|
||||
rm_reg := recipe.rm_op < 0 || inst.ops[recipe.rm_op].kind == .REGISTER
|
||||
imm_lit := recipe.imm_op < 0 || inst.ops[recipe.imm_op].kind == .IMMEDIATE
|
||||
if rm_reg && imm_lit {
|
||||
byte_count += emit_recipe(recipe, &inst, code[byte_count:])
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
// ENCODE INSTRUCTION (fully inlined hot path)
|
||||
// =====================================================================
|
||||
@@ -715,8 +734,8 @@ encode :: proc(
|
||||
|
||||
// Branchless select mask: 0xFF when `b`, else 0x00. Used to OR-accumulate
|
||||
// REX/VEX/EVEX bit contributions without a per-condition branch
|
||||
// (`x |= bmask(cond) & bits`).
|
||||
@(private="file")
|
||||
// (`x |= bmask(cond) & bits`). Package-private so the recipe emitter shares it.
|
||||
@(private)
|
||||
bmask :: #force_inline proc "contextless" (b: bool) -> u8 {
|
||||
return -u8(b)
|
||||
}
|
||||
|
||||
240
core/rexcode/isa/x86/encoder_recipe.odin
Normal file
240
core/rexcode/isa/x86/encoder_recipe.odin
Normal file
@@ -0,0 +1,240 @@
|
||||
// rexcode · Brendan Punsky (dotbmp@github), original author
|
||||
|
||||
package rexcode_x86
|
||||
|
||||
// =============================================================================
|
||||
// SECTION: 7.x Emit Descriptor (precompiled per-form recipe)
|
||||
// =============================================================================
|
||||
//
|
||||
// Each ENCODE_FORMS entry is a compact description the encoder *interprets* at
|
||||
// emit time: walk enc.ops to map operands to slots, switch on the escape ladder,
|
||||
// select the mandatory prefix, decide the ModR/M reg source, etc. For the common
|
||||
// legacy/SSE forms that work is identical on every instruction that shares the
|
||||
// form, so we precompute it once into a flat Form_Recipe that the hot path can
|
||||
// replay straight-line.
|
||||
//
|
||||
// Anything the flat recipe can't represent verbatim -- VEX/EVEX, 16-bit
|
||||
// operand-size (66h), x87 fixed-ModR/M, moffs/far/rel/implicit operands -- is
|
||||
// marked `eligible = false` and falls back to the existing interpreter, which
|
||||
// stays the source of truth for correctness.
|
||||
//
|
||||
// NOTE(interim): ENCODE_RECIPES is currently built at startup from the #loaded
|
||||
// ENCODE_FORMS (static storage, no heap). Once the fast path is validated this
|
||||
// moves into the table generator -- serialized and #loaded like every other
|
||||
// table, with no @init.
|
||||
|
||||
Form_Recipe :: struct {
|
||||
prefix: u8, // mandatory legacy prefix emitted before REX (0 = none)
|
||||
opcode: [3]u8, // escape + opcode: [op] / [0F,op] / [0F,38,op] / [0F,3A,op]
|
||||
opcode_len: u8, // 1..3
|
||||
ext: u8, // ModR/M reg ext digit (when reg_from_ext) or /digit source
|
||||
rm_op: i8, // user operand index -> ModR/M r/m field (-1 = none)
|
||||
reg_op: i8, // user operand index -> ModR/M reg field (-1 = none)
|
||||
opr_op: i8, // user operand index -> +rb opcode register (-1 = none)
|
||||
imm_op: i8, // user operand index -> immediate (-1 = none)
|
||||
imm_size: u8, // 1/2/4/8 when imm_op >= 0
|
||||
flags: Recipe_Flags,
|
||||
}
|
||||
|
||||
Recipe_Flags :: bit_field u8 {
|
||||
eligible: bool | 1, // emit via the recipe fast path; else fall back
|
||||
reg_from_ext: bool | 1, // ModR/M reg field = ext digit (opcode extension), not reg_op
|
||||
has_modrm: bool | 1, // a ModR/M byte is emitted (rm or reg operand present)
|
||||
force_rex_w: bool | 1, // always emit REX.W
|
||||
could_spl: bool | 1, // 8-bit form: an operand may be SPL/BPL/SIL/DIL (forces REX)
|
||||
default_64: bool | 1, // default 64-bit operand size (PUSH/POP/CALL/...)
|
||||
}
|
||||
|
||||
// Derive the flat recipe for one encoding form. Pure; identical whether called
|
||||
// here at startup or (later) from the table generator.
|
||||
@(require_results)
|
||||
form_to_recipe :: proc "contextless" (enc: ^Encoding) -> (r: Form_Recipe) {
|
||||
r.rm_op, r.reg_op, r.opr_op, r.imm_op = -1, -1, -1, -1
|
||||
|
||||
// Escape + opcode blob.
|
||||
switch enc.flags.esc {
|
||||
case .NONE: r.opcode = {enc.opcode, 0, 0}; r.opcode_len = 1
|
||||
case ._0F: r.opcode = {0x0F, enc.opcode, 0}; r.opcode_len = 2
|
||||
case ._0F38: r.opcode = {0x0F, 0x38, enc.opcode}; r.opcode_len = 3
|
||||
case ._0F3A: r.opcode = {0x0F, 0x3A, enc.opcode}; r.opcode_len = 3
|
||||
}
|
||||
|
||||
mand := [4]u8{0, 0x66, 0xF3, 0xF2}
|
||||
r.prefix = mand[enc.flags.prefix]
|
||||
r.ext = enc.ext
|
||||
r.flags.reg_from_ext = enc.flags.modrm_reg_ext
|
||||
r.flags.force_rex_w = enc.flags.force_rex_w
|
||||
r.flags.default_64 = enc.flags.default_64
|
||||
|
||||
eligible := enc.flags.vex_type == .NONE
|
||||
has_16bit := false
|
||||
has_8bit := false
|
||||
has_implicit := false
|
||||
has_exotic := false
|
||||
|
||||
// Walk the form's operands, mapping each encoded role to the *user* operand
|
||||
// index (implicit operands are not user-provided and don't advance it).
|
||||
user_idx := 0
|
||||
for op_type, i in enc.ops {
|
||||
if op_type == .NONE { break }
|
||||
if is_implicit_op_inline(op_type) {
|
||||
has_implicit = true
|
||||
continue
|
||||
}
|
||||
role_idx := i8(user_idx)
|
||||
user_idx += 1
|
||||
|
||||
#partial switch op_type {
|
||||
case .R16, .RM16, .M16, .IMM16:
|
||||
has_16bit = true
|
||||
case .R8, .RM8, .M8:
|
||||
has_8bit = true
|
||||
case .REL8, .REL32, .MOFFS8, .MOFFS16, .MOFFS32, .MOFFS64,
|
||||
.PTR16_16, .PTR16_32, .PTR16_64, .M16_16, .M16_32, .M16_64,
|
||||
.SREG, .CR, .DR, .STI, .MM, .MM_M64,
|
||||
.K, .K_M8, .K_M16, .K_M32, .K_M64:
|
||||
has_exotic = true
|
||||
}
|
||||
|
||||
#partial switch enc.enc[i] {
|
||||
case .MR: r.rm_op = role_idx
|
||||
case .REG: r.reg_op = role_idx
|
||||
case .OP_R: r.opr_op = role_idx
|
||||
case .IB: r.imm_op = role_idx; r.imm_size = 1
|
||||
case .IW: r.imm_op = role_idx; r.imm_size = 2
|
||||
case .ID: r.imm_op = role_idx; r.imm_size = 4
|
||||
case .IQ: r.imm_op = role_idx; r.imm_size = 8
|
||||
case .VVVV, .AAA, .IS4:
|
||||
eligible = false
|
||||
}
|
||||
}
|
||||
|
||||
r.flags.has_modrm = r.rm_op >= 0 || r.reg_op >= 0
|
||||
r.flags.could_spl = has_8bit
|
||||
|
||||
// x87 ST(i) / 0F NOP-class forms emit enc.ext as a literal ModR/M byte; the
|
||||
// fast path doesn't model that, so they fall back.
|
||||
is_x87 := enc.opcode >= 0xD8 && enc.opcode <= 0xDF
|
||||
fixed_modrm := enc.ext >= 0xC0 && !r.flags.has_modrm && (enc.flags.esc != .NONE || is_x87)
|
||||
|
||||
r.flags.eligible = eligible && !has_16bit && !has_implicit && !has_exotic && !fixed_modrm
|
||||
return
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Interim recipe table: built once at startup from the #loaded forms into static
|
||||
// storage (no heap). ENCODE_RECIPES is parallel to ENCODE_FORMS.
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@(private) ENCODE_RECIPE_CAP :: 4096
|
||||
@(private) encode_recipes_storage: [ENCODE_RECIPE_CAP]Form_Recipe
|
||||
ENCODE_RECIPES: []Form_Recipe
|
||||
|
||||
@(init)
|
||||
build_encode_recipes :: proc "contextless" () {
|
||||
n := min(len(ENCODE_FORMS), ENCODE_RECIPE_CAP)
|
||||
for i in 0..<n {
|
||||
encode_recipes_storage[i] = form_to_recipe(&ENCODE_FORMS[i])
|
||||
}
|
||||
ENCODE_RECIPES = encode_recipes_storage[:n]
|
||||
}
|
||||
|
||||
@(private)
|
||||
op_is_spl :: #force_inline proc "contextless" (op: ^Operand) -> bool {
|
||||
// SPL/BPL/SIL/DIL (GPR8 hw 4..7) require any REX to encode (else they read
|
||||
// as AH/CH/DH/BH).
|
||||
return op.kind == .REGISTER && reg_class(op.reg) == REG_GPR8 && reg_hw(op.reg) >= 4 && reg_hw(op.reg) <= 7
|
||||
}
|
||||
|
||||
// Recipe-driven straight-line emit. Handles the eligible legacy/SSE forms with a
|
||||
// register (or absent) r/m and a literal immediate -- exactly the cases the
|
||||
// caller guards for. Produces byte-identical output to the interpreter; anything
|
||||
// outside that envelope is rejected by the caller and never reaches here.
|
||||
@(require_results)
|
||||
emit_recipe :: #force_inline proc "contextless" (recipe: ^Form_Recipe, inst: ^Instruction, out: []u8) -> (pos: u32) {
|
||||
// Mandatory prefix (66/F3/F2 for SSE); operand-size 66h forms are ineligible.
|
||||
if recipe.prefix != 0 {
|
||||
out[pos] = recipe.prefix
|
||||
pos += 1
|
||||
}
|
||||
|
||||
// REX, OR-masked from the register-bearing roles (no memory base/index here).
|
||||
rex: u8 = recipe.flags.force_rex_w ? 0x48 : 0
|
||||
if recipe.reg_op >= 0 {
|
||||
op := &inst.ops[recipe.reg_op]
|
||||
rex |= bmask(op.kind == .REGISTER && reg_needs_rex(op.reg)) & 0x44
|
||||
}
|
||||
if recipe.rm_op >= 0 {
|
||||
op := &inst.ops[recipe.rm_op]
|
||||
rex |= bmask(op.kind == .REGISTER && reg_needs_rex(op.reg)) & 0x41
|
||||
}
|
||||
if recipe.opr_op >= 0 {
|
||||
op := &inst.ops[recipe.opr_op]
|
||||
rex |= bmask(op.kind == .REGISTER && reg_needs_rex(op.reg)) & 0x41
|
||||
}
|
||||
if recipe.flags.could_spl && rex == 0 {
|
||||
spl := false
|
||||
if recipe.rm_op >= 0 { spl = spl || op_is_spl(&inst.ops[recipe.rm_op]) }
|
||||
if recipe.reg_op >= 0 { spl = spl || op_is_spl(&inst.ops[recipe.reg_op]) }
|
||||
if recipe.opr_op >= 0 { spl = spl || op_is_spl(&inst.ops[recipe.opr_op]) }
|
||||
rex |= bmask(spl) & 0x40
|
||||
}
|
||||
if rex != 0 {
|
||||
out[pos] = rex
|
||||
pos += 1
|
||||
}
|
||||
|
||||
// Opcode blob; for +rb forms the register index folds into the last byte.
|
||||
ob := recipe.opcode
|
||||
if recipe.opr_op >= 0 {
|
||||
op := &inst.ops[recipe.opr_op]
|
||||
if op.kind == .REGISTER {
|
||||
ob[recipe.opcode_len - 1] += reg_hw(op.reg) & 0x7
|
||||
}
|
||||
}
|
||||
for j in 0..<recipe.opcode_len {
|
||||
out[pos] = ob[j]
|
||||
pos += 1
|
||||
}
|
||||
|
||||
// ModR/M, register-direct (mod = 11).
|
||||
if recipe.flags.has_modrm {
|
||||
reg_field: u8 = recipe.ext & 0x7
|
||||
if !recipe.flags.reg_from_ext {
|
||||
reg_field = 0
|
||||
if recipe.reg_op >= 0 {
|
||||
op := &inst.ops[recipe.reg_op]
|
||||
if op.kind == .REGISTER { reg_field = reg_hw(op.reg) & 0x7 }
|
||||
}
|
||||
}
|
||||
rm_field: u8 = 0
|
||||
if recipe.rm_op >= 0 {
|
||||
op := &inst.ops[recipe.rm_op]
|
||||
if op.kind == .REGISTER { rm_field = reg_hw(op.reg) & 0x7 }
|
||||
}
|
||||
out[pos] = 0xC0 | (reg_field << 3) | rm_field
|
||||
pos += 1
|
||||
}
|
||||
|
||||
// Immediate (literal; .RELATIVE/label immediates are rejected by the caller).
|
||||
if recipe.imm_op >= 0 {
|
||||
v := u64(inst.ops[recipe.imm_op].immediate)
|
||||
switch recipe.imm_size {
|
||||
case 1:
|
||||
out[pos] = u8(v)
|
||||
pos += 1
|
||||
case 2:
|
||||
out[pos] = u8(v); out[pos+1] = u8(v >> 8)
|
||||
pos += 2
|
||||
case 4:
|
||||
out[pos] = u8(v); out[pos+1] = u8(v >> 8); out[pos+2] = u8(v >> 16); out[pos+3] = u8(v >> 24)
|
||||
pos += 4
|
||||
case 8:
|
||||
out[pos] = u8(v); out[pos+1] = u8(v >> 8); out[pos+2] = u8(v >> 16); out[pos+3] = u8(v >> 24)
|
||||
out[pos+4] = u8(v >> 32); out[pos+5] = u8(v >> 40); out[pos+6] = u8(v >> 48); out[pos+7] = u8(v >> 56)
|
||||
pos += 8
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
Reference in New Issue
Block a user