diff --git a/core/rexcode/isa/x86/decoder.odin b/core/rexcode/isa/x86/decoder.odin index e99bce595..73488bf03 100644 --- a/core/rexcode/isa/x86/decoder.odin +++ b/core/rexcode/isa/x86/decoder.odin @@ -943,7 +943,54 @@ decode_implicit_operand :: proc(op_type: Operand_Type) -> Operand { // ============================================================================= // ----------------------------------------------------------------------------- -// 8.9 Core Decode Procedure +// 8.9 Buffer-Sizing Helpers +// ----------------------------------------------------------------------------- +// +// decode() appends one Instruction + Instruction_Info per decoded instruction +// (and a Label_Definition per inferred branch target) to caller-owned dynamic +// arrays, and never reserves them itself. Growing those arrays from empty costs +// a chain of doubling reallocations -- ~15% of decode time on a cold buffer. +// These helpers let a caller pre-size the arrays once so the hot path never +// allocates; all reserves add to whatever the arrays already hold. + +// Exact upper bound on the number of instructions in `data` (the shortest x64 +// instruction is one byte). Reserving this guarantees decode() never +// reallocates, at the cost of over-allocating for typical code. +@(require_results) +decode_max_instruction_count :: #force_inline proc "contextless" (data: []u8) -> int { + return len(data) +} + +// Typical-case estimate of the instruction count for `data` (~3.x bytes per +// instruction for x64). Cheaper on memory than the exact ceiling; dense code +// may trigger a single reallocation. +@(require_results) +decode_estimate_instruction_count :: #force_inline proc "contextless" (data: []u8) -> int { + return len(data) / 3 + 8 +} + +// Pre-size the caller's decode output arrays for the `data` to be decoded so the +// decode hot path never reallocates. Reserves capacity on top of whatever each +// array already holds; allocates no new buffers (only the caller's arrays grow, +// and only if not already big enough). Arrays are optional (nil to skip). Pass +// `exact = true` for the guaranteed (over-allocating) ceiling; the default uses +// the typical-case estimate. +decode_reserve :: proc( + instructions: ^[dynamic]Instruction, + inst_info: ^[dynamic]Instruction_Info, + label_defs: ^[dynamic]Label_Definition, + data: []u8, + exact: bool = false, +) { + n := exact ? decode_max_instruction_count(data) : decode_estimate_instruction_count(data) + if instructions != nil { reserve(instructions, len(instructions) + n) } + if inst_info != nil { reserve(inst_info, len(inst_info) + n) } + if label_defs != nil { reserve(label_defs, len(label_defs) + n) } +} + +// ============================================================================= +// ----------------------------------------------------------------------------- +// 8.10 Core Decode Procedure // ----------------------------------------------------------------------------- // decode: The single entry point for x64 instruction decoding. @@ -993,6 +1040,9 @@ decode :: proc( data_length := u32(len(data)) + // (Callers that want the hot path to never reallocate the output arrays + // pre-size them with decode_reserve(); decode() itself does not allocate.) + // Track branch targets for label inference (resolved in pass 2 by isa). pending_branches: [dynamic]isa.Branch_Target defer delete(pending_branches) diff --git a/core/rexcode/isa/x86/encoder.odin b/core/rexcode/isa/x86/encoder.odin index 1230401b5..f73730d92 100644 --- a/core/rexcode/isa/x86/encoder.odin +++ b/core/rexcode/isa/x86/encoder.odin @@ -901,16 +901,46 @@ imm_matches_inline :: #force_inline proc "contextless" (op: ^Operand, op_type: O } // ----------------------------------------------------------------------------- -// SECTION: 7.8 Convenience Functions +// SECTION: 7.8 Buffer-Sizing Helpers // ----------------------------------------------------------------------------- +// +// encode() allocates nothing -- it writes machine code into the caller's `code` +// and appends unresolved Relocations to the caller's dynamic array. These +// helpers let the caller size those buffers from the instruction slice: either +// the plain size (caller manages its own memory) or by pre-sizing the caller's +// own dynamic arrays directly. No new buffers are ever allocated; only the +// caller's arrays are grown, and only when they are not already large enough. +// (The error array grows only on the failure path, so it is left out.) -// Compute safe buffer sizes for encoding -encode_max_code_size :: #force_inline proc "contextless" (n: int) -> int { - return n * MAX_INST_SIZE +// Exact maximum number of code bytes encode() can emit for `instructions` +// (the longest x64 instruction is MAX_INST_SIZE). +@(require_results) +encode_max_code_size :: #force_inline proc "contextless" (instructions: []Instruction) -> int { + return len(instructions) * MAX_INST_SIZE } -encode_max_relocation_count :: #force_inline proc "contextless" (n: int) -> int { - return n // At most 1 reloc per instruction +// Exact maximum number of relocations encode() can produce for `instructions`. +@(require_results) +encode_max_relocation_count :: #force_inline proc "contextless" (instructions: []Instruction) -> int { + return len(instructions) // at most one reloc per instruction +} + +// Pre-size the caller's encode output buffers so the encode hot path never +// reallocates. `code` is the caller's dynamic []u8 (sliced as `code[:]` when +// calling encode); its LENGTH is grown to hold the worst case so the slice is a +// valid emit target. `relocs` capacity is reserved on top of whatever it +// already holds. Both are optional (nil to skip). Grows only the caller's own +// arrays, and is a no-op for any that are already big enough. +encode_reserve :: proc(code: ^[dynamic]u8, relocs: ^[dynamic]Relocation, instructions: []Instruction) { + if code != nil { + size := encode_max_code_size(instructions) + if len(code) < size { + resize(code, size) + } + } + if relocs != nil { + reserve(relocs, len(relocs) + encode_max_relocation_count(instructions)) + } } // -----------------------------------------------------------------------------