mirror of
https://github.com/odin-lang/Odin.git
synced 2026-06-19 08:32:33 +00:00
rexcode/x86: pre-matched encode hint + repair the typed builders
Targeted branchless revert + the pre-matched form fast path, and a fix
for a pre-existing bug the latter surfaced.
(a) Revert the two speculative-write spots from the prior branchless pass
(legacy-prefix emission, widened displacement store, ENCODE_TAIL_SLACK)
back to predicted branches. In real streams a legacy prefix is almost
always absent and disp size is stable, so those branches are ~free and
the unconditional stores only added work. Every class got faster
(RET 19->17.5, MOV r,r 52->46.6, VADDPS 42.8->39.3 ns).
(b) Pre-matched form hint. Instruction.enc_hint (in the existing 11-byte
padding, idx+1 biased; 0 = matcher path) lets a typed builder that maps
to a single value-independent form bake the global form index, so
encode() skips the O(forms) match scan -- and, in a varied stream, its
unpredictable branches. Generated for non-immediate forms only (value-
dependent imm8/imm32 selection stays on the matcher). On a 100k mixed
typed-builder stream: 47.3 -> 30.2 ns/inst (-36%), byte-identical to the
matcher path -- ~2x the original baseline for codegen.
Repair the typed inst_/emit_ builders. They were non-functional: the
generator cast the hw-only typed enum straight to Register
(Register(GPR64.RAX) -> class 0), so every typed-builder operand was
rejected by the matcher (encode returned empty). Untested because the
suite builds via the generic constructors. Now they build through the
class-correct op_gpr64/op_xmm/... path (op_* already used by 3+ operand
builders), emit_ reuses inst_, and a new 30-case consistency suite
asserts typed == generic (llvm-verified) and hint == matcher.
gen/builders/check/test/idempotent all green; 2276 cases.
This commit is contained in:
@@ -33,13 +33,6 @@ import "core:rexcode/isa"
|
||||
|
||||
MAX_INST_SIZE :: 15 // Maximum x64 instruction length
|
||||
|
||||
// Extra bytes reserved past each instruction so the branchless emitters can
|
||||
// write a few speculative bytes beyond the logical end (e.g. a widened 4-byte
|
||||
// displacement store when only a disp8 is kept). The over-written tail is
|
||||
// reclaimed by the next emit; this slack just guarantees the wide store stays
|
||||
// in bounds even for the final instruction against a tight buffer.
|
||||
ENCODE_TAIL_SLACK :: 8
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// SECTION: 7.6 Core Encoding Function
|
||||
@@ -107,7 +100,7 @@ encode :: proc(
|
||||
}
|
||||
|
||||
// Check buffer space
|
||||
if byte_count + MAX_INST_SIZE + ENCODE_TAIL_SLACK > u32(len(code)) {
|
||||
if byte_count + MAX_INST_SIZE > u32(len(code)) {
|
||||
append(errors, Error{u32(instruction_index), .BUFFER_OVERFLOW, {}})
|
||||
ok = false
|
||||
continue
|
||||
@@ -147,27 +140,38 @@ encode :: proc(
|
||||
}
|
||||
}
|
||||
|
||||
// Find matching encoding from table (O(1) mnemonic lookup)
|
||||
encodings := encoding_forms(inst.mnemonic)
|
||||
if len(encodings) == 0 {
|
||||
append(errors, Error{u32(instruction_index), .INVALID_MNEMONIC, {}})
|
||||
ok = false
|
||||
continue
|
||||
}
|
||||
|
||||
// Find the first encoding that matches operands
|
||||
matched_enc: ^Encoding = nil
|
||||
for &e in encodings {
|
||||
if encoding_matches_inline(&inst, &e, mode) {
|
||||
matched_enc = &e
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if matched_enc == nil {
|
||||
append(errors, Error{u32(instruction_index), .NO_MATCHING_ENCODING, {}})
|
||||
ok = false
|
||||
continue
|
||||
// Pre-matched form fast-path: a typed builder that maps to a single
|
||||
// encoding form bakes `global_index + 1` into enc_hint, letting us skip
|
||||
// the O(forms) match scan entirely -- and with it the scan's branches,
|
||||
// which are the unpredictable ones in a varied instruction stream. Only
|
||||
// in long mode (the builders' target); bounds-checked; anything else
|
||||
// (hand-built, generic builders, i386, decode) falls back to matching.
|
||||
if mode == ._64 && inst.enc_hint != ENC_HINT_NONE && int(inst.enc_hint) <= len(ENCODE_FORMS) {
|
||||
matched_enc = &ENCODE_FORMS[inst.enc_hint - 1]
|
||||
} else {
|
||||
// Find matching encoding from table (O(1) mnemonic lookup)
|
||||
encodings := encoding_forms(inst.mnemonic)
|
||||
if len(encodings) == 0 {
|
||||
append(errors, Error{u32(instruction_index), .INVALID_MNEMONIC, {}})
|
||||
ok = false
|
||||
continue
|
||||
}
|
||||
|
||||
// Find the first encoding that matches operands
|
||||
for &e in encodings {
|
||||
if encoding_matches_inline(&inst, &e, mode) {
|
||||
matched_enc = &e
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if matched_enc == nil {
|
||||
append(errors, Error{u32(instruction_index), .NO_MATCHING_ENCODING, {}})
|
||||
ok = false
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// =====================================================================
|
||||
@@ -208,34 +212,37 @@ encode :: proc(
|
||||
}
|
||||
has_modrm := mr_slot >= 0 || reg_slot >= 0
|
||||
|
||||
// --- Legacy Prefixes (branchless) ---
|
||||
// --- Legacy Prefixes ---
|
||||
//
|
||||
// Each optional prefix byte is written *speculatively* at `pos`, then
|
||||
// `pos` advances only if the prefix is actually present. When absent the
|
||||
// speculative byte is overwritten by the next emit (the opcode always
|
||||
// writes at `pos`), so the final stream is identical to the branching
|
||||
// form -- with four data-dependent branches removed. The buffer carries
|
||||
// MAX_INST_SIZE slack (checked above), so the spec writes stay in bounds.
|
||||
// Kept as predicted branches: in real instruction streams a legacy
|
||||
// prefix is almost always absent, so these are ~100% predicted-not-taken
|
||||
// (free), and the branchless speculative-write form only added four
|
||||
// unconditional stores per instruction for no win. See git history.
|
||||
|
||||
// Lock prefix (F0)
|
||||
out[pos] = 0xF0
|
||||
pos += u32(inst.flags.lock && enc.flags.lock_ok)
|
||||
if inst.flags.lock && enc.flags.lock_ok {
|
||||
out[pos] = 0xF0
|
||||
pos += 1
|
||||
}
|
||||
|
||||
// Rep/Repne prefix (NONE -> 0, REP -> F3, REPNE -> F2)
|
||||
REP_BYTE := [Rep]u8{ .NONE = 0, .REP = 0xF3, .REPNE = 0xF2 }
|
||||
rep_b := REP_BYTE[inst.flags.rep]
|
||||
out[pos] = rep_b
|
||||
pos += u32(rep_b != 0)
|
||||
// Rep/Repne prefix
|
||||
#partial switch inst.flags.rep {
|
||||
case .REP: out[pos] = 0xF3; pos += 1
|
||||
case .REPNE: out[pos] = 0xF2; pos += 1
|
||||
}
|
||||
|
||||
// Segment override (table already maps 0 -> 0)
|
||||
seg_prefix := [8]u8{0, 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65, 0}
|
||||
seg_b := seg_prefix[inst.flags.segment]
|
||||
out[pos] = seg_b
|
||||
pos += u32(seg_b != 0)
|
||||
// Segment override
|
||||
if inst.flags.segment != 0 {
|
||||
seg_prefix := [8]u8{0, 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65, 0}
|
||||
out[pos] = seg_prefix[inst.flags.segment]
|
||||
pos += 1
|
||||
}
|
||||
|
||||
// Address size override (67h)
|
||||
out[pos] = 0x67
|
||||
pos += u32(inst.flags.addr32)
|
||||
if inst.flags.addr32 {
|
||||
out[pos] = 0x67
|
||||
pos += 1
|
||||
}
|
||||
|
||||
// --- VEX/EVEX or Legacy Encoding ---
|
||||
|
||||
@@ -580,15 +587,14 @@ encode :: proc(
|
||||
pos += 1
|
||||
}
|
||||
|
||||
// Displacement: four unconditional little-endian byte stores, then
|
||||
// advance by the real size (0/1/4) -- no data-dependent loop. The
|
||||
// untaken tail bytes are reclaimed by the next emit; ENCODE_TAIL_SLACK
|
||||
// keeps the widened store in bounds.
|
||||
out[pos+0] = u8(disp)
|
||||
out[pos+1] = u8(disp >> 8)
|
||||
out[pos+2] = u8(disp >> 16)
|
||||
out[pos+3] = u8(disp >> 24)
|
||||
pos += u32(displacement_size)
|
||||
// Displacement: bounded little-endian emit. Kept as a counted loop
|
||||
// (0/1/4 trips, highly predictable per code pattern) so no buffer
|
||||
// tail-slack is needed and no bytes are written past the real size.
|
||||
for _ in 0..<displacement_size {
|
||||
out[pos] = u8(disp & 0xFF)
|
||||
disp >>= 8
|
||||
pos += 1
|
||||
}
|
||||
}
|
||||
|
||||
// Fixed ModR/M for special instructions. Triggered for:
|
||||
@@ -912,7 +918,7 @@ imm_matches_inline :: #force_inline proc "contextless" (op: ^Operand, op_type: O
|
||||
|
||||
// Compute safe buffer sizes for encoding
|
||||
encode_max_code_size :: #force_inline proc "contextless" (n: int) -> int {
|
||||
return n * MAX_INST_SIZE + ENCODE_TAIL_SLACK
|
||||
return n * MAX_INST_SIZE
|
||||
}
|
||||
|
||||
encode_max_relocation_count :: #force_inline proc "contextless" (n: int) -> int {
|
||||
|
||||
@@ -35,10 +35,25 @@ Instruction :: struct #packed {
|
||||
operand_count: u8, // 1 byte
|
||||
flags: Instruction_Flags, // 1 byte
|
||||
length: u8, // 1 byte (filled by decoder, used for iteration)
|
||||
_: [11]u8, // 11 bytes
|
||||
enc_hint: u16, // 2 bytes (pre-matched form, +1 biased; 0 = none)
|
||||
_: [9]u8, // 9 bytes
|
||||
}
|
||||
#assert(size_of(Instruction) == 64)
|
||||
|
||||
// Pre-matched encoding hint: a typed builder that maps to exactly one encoding
|
||||
// form (no value-dependent immediate selection) stores `global_form_index + 1`
|
||||
// in `Instruction.enc_hint`, letting encode() skip the form-match scan. 0 means
|
||||
// "no hint" -- the zero value, so every hand-built / generic-builder / decoded
|
||||
// instruction stays on the matching path unchanged.
|
||||
ENC_HINT_NONE :: u16(0)
|
||||
|
||||
@(require_results)
|
||||
with_hint :: #force_inline proc "contextless" (inst: Instruction, hint: u16) -> Instruction {
|
||||
inst := inst
|
||||
inst.enc_hint = hint
|
||||
return inst
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// SECTION: 7.9 Instruction Builder Helpers
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3139,6 +3139,101 @@ print_summary :: proc() {
|
||||
fmt.printf("%s======================================================================%s\n", BOLD, RESET)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// TYPED BUILDER CONSISTENCY
|
||||
// =============================================================================
|
||||
//
|
||||
// The generated typed builders (inst_<mnem>/emit_<mnem>) were previously
|
||||
// untested. A class-dropping register cast made every one of them encode to
|
||||
// nothing, and the pre-matched enc_hint fast path needs guarding. Each case
|
||||
// asserts the typed builder is byte-identical to the llvm-verified generic
|
||||
// builder AND that the baked enc_hint matches the matcher path (hint cleared).
|
||||
|
||||
@(private="file") tb_a: [64]u8
|
||||
@(private="file") tb_b: [64]u8
|
||||
|
||||
@(private="file")
|
||||
tb_enc :: proc(inst: x86.Instruction, buf: []u8) -> []u8 {
|
||||
relocs: [dynamic]x86.Relocation; errors: [dynamic]x86.Error
|
||||
defer { delete(relocs); delete(errors) }
|
||||
n, _ := x86.encode({inst}, nil, buf, &relocs, &errors)
|
||||
return buf[:n]
|
||||
}
|
||||
|
||||
@(private="file")
|
||||
tb_eq :: proc(a, b: []u8) -> bool {
|
||||
if len(a) != len(b) || len(a) == 0 { return false }
|
||||
for x, i in a { if x != b[i] { return false } }
|
||||
return true
|
||||
}
|
||||
|
||||
@(private="file")
|
||||
tb_check :: proc(name: string, typed, generic: x86.Instruction) {
|
||||
t := tb_enc(typed, tb_a[:])
|
||||
g := tb_enc(generic, tb_b[:])
|
||||
typed_ok := tb_eq(t, g)
|
||||
// hint path must equal the matcher path for the very same instruction
|
||||
cleared := typed; cleared.enc_hint = 0
|
||||
hint_ok := tb_eq(t, tb_enc(cleared, tb_b[:]))
|
||||
if typed_ok && hint_ok {
|
||||
g_stats.passed += 1
|
||||
g_stats.cases_validated += 1
|
||||
} else {
|
||||
g_stats.failed += 1
|
||||
fmt.printf(" %sFAIL%s %s: typed=% x generic=% x (typed_ok=%v hint_ok=%v)\n",
|
||||
RED, RESET, name, t, g, typed_ok, hint_ok)
|
||||
}
|
||||
}
|
||||
|
||||
run_typed_builder_tests :: proc() {
|
||||
md8 := x86.mem_base_disp(x86.RBP, -16)
|
||||
md32 := x86.mem_base_disp(x86.RCX, 100000)
|
||||
mbi := x86.mem_base_index_disp(x86.R8, x86.RDX, 4, 32)
|
||||
mrip := x86.mem_rip_disp(0)
|
||||
|
||||
// GPR reg-reg, every size (r16 exercises the 66h class-dependent prefix)
|
||||
tb_check("mov r8,r8", x86.inst_mov_r8_r8(.AL,.BL), x86.inst_r_r(.MOV, x86.AL, x86.BL))
|
||||
tb_check("mov r16,r16", x86.inst_mov_r16_r16(.AX,.BX), x86.inst_r_r(.MOV, x86.AX, x86.BX))
|
||||
tb_check("mov r32,r32", x86.inst_mov_r32_r32(.EAX,.EDX), x86.inst_r_r(.MOV, x86.EAX, x86.EDX))
|
||||
tb_check("mov r64,r64", x86.inst_mov_r64_r64(.RAX,.RBX), x86.inst_r_r(.MOV, x86.RAX, x86.RBX))
|
||||
tb_check("mov r64 ext", x86.inst_mov_r64_r64(.R8,.R15), x86.inst_r_r(.MOV, x86.R8, x86.R15))
|
||||
tb_check("mov r32 ext", x86.inst_mov_r32_r32(.R9D,.R10D),x86.inst_r_r(.MOV, x86.R9D, x86.R10D))
|
||||
|
||||
// GPR arithmetic/logical reg-reg
|
||||
tb_check("add r64,r64", x86.inst_add_r64_r64(.RAX,.RCX), x86.inst_r_r(.ADD, x86.RAX, x86.RCX))
|
||||
tb_check("sub r64,r64", x86.inst_sub_r64_r64(.RSI,.RDI), x86.inst_r_r(.SUB, x86.RSI, x86.RDI))
|
||||
tb_check("and r64,r64", x86.inst_and_r64_r64(.RBX,.RAX), x86.inst_r_r(.AND, x86.RBX, x86.RAX))
|
||||
tb_check("or r64,r64", x86.inst_or_r64_r64(.RBX,.RAX), x86.inst_r_r(.OR, x86.RBX, x86.RAX))
|
||||
tb_check("xor r64,r64", x86.inst_xor_r64_r64(.R8,.R8), x86.inst_r_r(.XOR, x86.R8, x86.R8))
|
||||
tb_check("cmp r64,r64", x86.inst_cmp_r64_r64(.RAX,.RDX), x86.inst_r_r(.CMP, x86.RAX, x86.RDX))
|
||||
tb_check("add r32,r32", x86.inst_add_r32_r32(.EAX,.ECX), x86.inst_r_r(.ADD, x86.EAX, x86.ECX))
|
||||
|
||||
// GPR reg-mem / mem-reg across addressing modes
|
||||
tb_check("mov r64,[d8]", x86.inst_mov_r64_m64(.RDX, x86.Mem64{md8}), x86.inst_r_m(.MOV, x86.RDX, md8, 8))
|
||||
tb_check("mov r64,[d32]", x86.inst_mov_r64_m64(.RAX, x86.Mem64{md32}), x86.inst_r_m(.MOV, x86.RAX, md32, 8))
|
||||
tb_check("mov r64,[b+i]", x86.inst_mov_r64_m64(.RAX, x86.Mem64{mbi}), x86.inst_r_m(.MOV, x86.RAX, mbi, 8))
|
||||
tb_check("mov r64,[rip]", x86.inst_mov_r64_m64(.RAX, x86.Mem64{mrip}), x86.inst_r_m(.MOV, x86.RAX, mrip, 8))
|
||||
tb_check("mov [b+i],r64", x86.inst_mov_m64_r64(x86.Mem64{mbi}, .R9), x86.inst_m_r(.MOV, mbi, 8, x86.R9))
|
||||
tb_check("add r64,[d8]", x86.inst_add_r64_m64(.RAX, x86.Mem64{md8}), x86.inst_r_m(.ADD, x86.RAX, md8, 8))
|
||||
|
||||
// SSE (legacy) + VEX vector
|
||||
tb_check("movaps x,x", x86.inst_movaps_xmm_xmm(.XMM0,.XMM1), x86.inst_r_r(.MOVAPS, x86.XMM0, x86.XMM1))
|
||||
tb_check("movaps x,m", x86.inst_movaps_xmm_m128(.XMM3, x86.Mem128{mbi}), x86.inst_r_m(.MOVAPS, x86.XMM3, mbi, 16))
|
||||
tb_check("movaps m,x", x86.inst_movaps_m128_xmm(x86.Mem128{mbi}, .XMM8), x86.inst_m_r(.MOVAPS, mbi, 16, x86.XMM8))
|
||||
tb_check("addps x,x", x86.inst_addps_xmm_xmm(.XMM2,.XMM4), x86.inst_r_r(.ADDPS, x86.XMM2, x86.XMM4))
|
||||
tb_check("vaddps y,y,y", x86.inst_vaddps_ymm_ymm_ymm(.YMM0,.YMM1,.YMM2), x86.inst_r_r_r(.VADDPS, x86.YMM0, x86.YMM1, x86.YMM2))
|
||||
tb_check("vaddps y ext", x86.inst_vaddps_ymm_ymm_ymm(.YMM8,.YMM12,.YMM15), x86.inst_r_r_r(.VADDPS, x86.YMM8, x86.YMM12, x86.YMM15))
|
||||
tb_check("vmulps x,x,x", x86.inst_vmulps_xmm_xmm_xmm(.XMM0,.XMM1,.XMM2), x86.inst_r_r_r(.VMULPS, x86.XMM0, x86.XMM1, x86.XMM2))
|
||||
|
||||
// opcode+reg
|
||||
tb_check("push r64", x86.inst_push_r64(.R11), x86.inst_r(.PUSH, x86.R11))
|
||||
tb_check("pop r64", x86.inst_pop_r64(.R12), x86.inst_r(.POP, x86.R12))
|
||||
|
||||
// immediate forms (no hint -- value-dependent; must still be correct)
|
||||
tb_check("mov r32,imm32", x86.inst_mov_r32_imm32(.EAX, 0x12345678), x86.inst_r_i(.MOV, x86.EAX, 0x12345678, 4))
|
||||
tb_check("mov r64,imm64", x86.inst_mov_r64_imm64(.RAX, 0x1122334455667788), x86.inst_r_i(.MOV, x86.RAX, 0x1122334455667788, 8))
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// MAIN
|
||||
// =============================================================================
|
||||
@@ -3187,6 +3282,9 @@ main :: proc() {
|
||||
log_header("LABEL_MAP TESTS")
|
||||
run_label_map_tests()
|
||||
|
||||
log_header("TYPED BUILDER CONSISTENCY")
|
||||
run_typed_builder_tests()
|
||||
|
||||
log_header("PERFORMANCE BENCHMARKS")
|
||||
run_benchmarks()
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ Proc_Entry :: struct {
|
||||
mnemonic: x86.Mnemonic,
|
||||
sig: Operand_Signature,
|
||||
proc_name: string,
|
||||
enc_hint: u16, // biased global form index (idx+1) for the pre-match fast path; 0 = none
|
||||
}
|
||||
|
||||
GEN_ATTRIB :: "// rexcode · Brendan Punsky (dotbmp@github), original author\n\n"
|
||||
@@ -74,10 +75,21 @@ main :: proc() {
|
||||
encodings := x86.ENCODE_FORMS[_run.start:][:_run.count]
|
||||
if len(encodings) == 0 { continue }
|
||||
|
||||
for enc in encodings {
|
||||
for enc, enc_idx in encodings {
|
||||
// Skip encodings we can't generate builders for (implicit-only operands, etc.)
|
||||
can_generate_builder(enc) or_continue
|
||||
|
||||
// A typed builder may bake a pre-matched form hint only when the
|
||||
// matcher's pick is value-INDEPENDENT (no immediate/relative size
|
||||
// selection); otherwise the matcher might choose a shorter form for
|
||||
// some values, so we leave enc_hint=0 (matcher path). The first form
|
||||
// in run order that produces a given proc_name wins the dedup below,
|
||||
// which mirrors the matcher's first-match-in-run-order pick.
|
||||
hint: u16 = 0
|
||||
if form_is_hintable(enc) {
|
||||
hint = u16(int(_run.start) + enc_idx + 1)
|
||||
}
|
||||
|
||||
// For RM operands, generate both register and memory variants
|
||||
variants := get_operand_variants(enc)
|
||||
|
||||
@@ -95,6 +107,7 @@ main :: proc() {
|
||||
mnemonic = mnemonic,
|
||||
sig = sig,
|
||||
proc_name = proc_name,
|
||||
enc_hint = hint,
|
||||
}
|
||||
|
||||
if mnemonic not_in procs_by_mnemonic {
|
||||
@@ -328,6 +341,21 @@ can_generate_builder :: proc(enc: x86.Encoding) -> bool {
|
||||
return !has_any_operand || has_explicit
|
||||
}
|
||||
|
||||
// A form is safe to pre-match (bake an enc_hint) only when the matcher's pick is
|
||||
// VALUE-independent: it has no immediate or relative operand. Those select
|
||||
// imm8-vs-imm32 / rel8-vs-rel32 by the runtime value, so the matcher may pick a
|
||||
// shorter form than a typed builder's nominal one -- baking would diverge from
|
||||
// the matcher (and from llvm-mc). Such forms keep enc_hint=0 (matcher path).
|
||||
form_is_hintable :: proc(enc: x86.Encoding) -> bool {
|
||||
for op in enc.ops {
|
||||
#partial switch op {
|
||||
case .IMM8, .IMM16, .IMM32, .IMM64, .IMM8SX, .REL8, .REL32:
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Get all variants for an encoding (expands RM operands into reg and mem variants)
|
||||
get_operand_variants :: proc(enc: x86.Encoding) -> []Operand_Signature {
|
||||
result: [dynamic]Operand_Signature
|
||||
@@ -1366,7 +1394,18 @@ generate_proc :: proc(sb: ^strings.Builder, entry: Proc_Entry, max_name_padding:
|
||||
strings.write_string(sb, " :: #force_inline proc \"contextless\" (")
|
||||
strings.write_string(sb, params)
|
||||
strings.write_string(sb, ") -> Instruction { return ")
|
||||
generate_helper_call(sb, entry)
|
||||
// Build via the typed op_* constructors (op_gpr64/op_xmm/...), which carry
|
||||
// the register CLASS. The older inst_r_r(.., Register(dst), ..) shortcut cast
|
||||
// the hw-only typed enum straight to Register and dropped the class, so every
|
||||
// typed builder produced a class-0 operand the matcher rejected (encode -> empty).
|
||||
if entry.enc_hint != 0 {
|
||||
// Pre-matched form: bake the biased global index so encode() skips the scan.
|
||||
strings.write_string(sb, "with_hint(")
|
||||
generate_fallback_instruction(sb, entry)
|
||||
fmt.sbprintf(sb, ", %d)", entry.enc_hint)
|
||||
} else {
|
||||
generate_fallback_instruction(sb, entry)
|
||||
}
|
||||
strings.write_string(sb, " }\n")
|
||||
}
|
||||
|
||||
@@ -1402,7 +1441,14 @@ generate_emit_proc :: proc(sb: ^strings.Builder, entry: Proc_Entry, max_name_pad
|
||||
}
|
||||
strings.write_string(sb, " :: #force_inline proc(")
|
||||
strings.write_string(sb, params)
|
||||
strings.write_string(sb, ") { ")
|
||||
generate_emit_helper_call(sb, entry)
|
||||
strings.write_string(sb, " }\n")
|
||||
// Reuse the (class-correct, hint-baked) inst_ builder rather than re-emitting
|
||||
// the operands -- keeps emit_ in lockstep with inst_ and inherits the hint.
|
||||
strings.write_string(sb, ") { append(instructions, ")
|
||||
strings.write_string(sb, entry.proc_name)
|
||||
strings.write_string(sb, "(")
|
||||
for i in 0..<sig.count {
|
||||
if i > 0 { strings.write_string(sb, ", ") }
|
||||
strings.write_string(sb, names[i])
|
||||
}
|
||||
strings.write_string(sb, ")) }\n")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user