Files
Odin/core/rexcode/x86/tablegen/gen.odin
2026-06-15 13:49:57 +01:00

540 lines
20 KiB
Odin

// rexcode · Brendan Punsky (dotbmp@github), original author
package rexcode_x86_tablegen
// =============================================================================
// x86 TABLE GENERATOR (Stage A)
// =============================================================================
//
// Reads the single-source-of-truth ENCODING_TABLE (encoding_table.odin, in this
// same package) and emits human-readable, type-checked Odin into ./generated/:
//
// generated/encode_tables.odin ENCODE_FORMS + ENCODE_RUNS (flattened encode)
// generated/decode_tables.odin modrm/sib + decode entries + opcode indices
// generated/writer.odin Stage B: serializes those globals to ../tables/*.bin
//
// It also (re)emits the library loader ../tables.odin (subsidiary types + #load).
//
// Run from anywhere:
// odin run core/rexcode/x86/tablegen # Stage A (this program)
// odin run core/rexcode/x86/tablegen/generated # Stage B (writes the blobs)
//
// The generated Odin is the auditable intermediate: the compiler validates it
// before Stage B dumps it to raw bytes, so the blobs can never drift from a
// well-typed table.
import "core:fmt"
import "core:os"
import "core:strings"
import "core:slice"
import "core:reflect"
import lib "../"
// Package-scope aliases so the moved SoT (encoding_table.odin) resolves
// `Mnemonic`/`Encoding` unqualified — its body stays byte-for-byte unedited.
Encoding :: lib.Encoding
Mnemonic :: lib.Mnemonic
PREFIX_66 :: lib.PREFIX_66
PREFIX_F3 :: lib.PREFIX_F3
PREFIX_F2 :: lib.PREFIX_F2
// One row of the blob manifest. Drives BOTH the loader's #load lines and the
// writer's dump calls, so the two can never disagree on names/files.
Blob :: struct { global, file, typ: string }
@(rodata)
BLOBS := [?]Blob{
{"ENCODE_FORMS", "x86.encode_forms.bin", "Encoding"},
{"ENCODE_RUNS", "x86.encode_runs.bin", "Encode_Run"},
{"MODRM_TABLE", "x86.modrm.bin", "ModRM_Info"},
{"SIB_TABLE", "x86.sib.bin", "SIB_Info"},
{"LEGACY_DECODE_ENTRIES", "x86.legacy.bin", "Decode_Entry"},
{"VEX_DECODE_ENTRIES", "x86.vex.bin", "VEX_Decode_Entry"},
{"EVEX_DECODE_ENTRIES", "x86.evex.bin", "VEX_Decode_Entry"},
{"DECODE_INDEX_LEGACY", "x86.idx_legacy.bin", "Decode_Index"},
{"DECODE_INDEX_ESC_0F", "x86.idx_0f.bin", "Decode_Index"},
{"DECODE_INDEX_ESC_0F38", "x86.idx_0f38.bin", "Decode_Index"},
{"DECODE_INDEX_ESC_0F3A", "x86.idx_0f3a.bin", "Decode_Index"},
{"VEX_INDEX_0F", "x86.vex_idx_0f.bin", "Decode_Index"},
{"VEX_INDEX_0F38", "x86.vex_idx_0f38.bin", "Decode_Index"},
{"VEX_INDEX_0F3A", "x86.vex_idx_0f3a.bin", "Decode_Index"},
{"EVEX_INDEX_0F", "x86.evex_idx_0f.bin", "Decode_Index"},
{"EVEX_INDEX_0F38", "x86.evex_idx_0f38.bin","Decode_Index"},
{"EVEX_INDEX_0F3A", "x86.evex_idx_0f3a.bin","Decode_Index"},
}
DIR_GEN :: #directory + "/generated/"
PATH_LOADER :: #directory + "/../tables.odin"
main :: proc() {
emit_encode_tables()
nl, nv, ne := emit_decode_tables()
emit_writer()
emit_loader()
fmt.printfln("x86 tablegen: %d encode forms, %d legacy / %d vex / %d evex decode entries",
total_forms(), nl, nv, ne)
}
// -----------------------------------------------------------------------------
// Encode side: ENCODE_FORMS (flat) + ENCODE_RUNS (per-mnemonic index)
// -----------------------------------------------------------------------------
total_forms :: proc() -> (n: int) {
for m in Mnemonic { n += len(ENCODING_TABLE[m]) }
return
}
emit_encode_tables :: proc() {
sb := strings.builder_make()
strings.write_string(&sb, "package rexcode_x86_generated\n\n")
strings.write_string(&sb, "// GENERATED by ../gen.odin -- DO NOT EDIT.\n")
strings.write_string(&sb, "// Flattened encode forms + per-mnemonic run index (source: ENCODING_TABLE).\n\n")
strings.write_string(&sb, "import lib \"../..\"\n\n")
max_name := 0
for m in Mnemonic {
if len(ENCODING_TABLE[m]) > 0 {
max_name = max(max_name, len(reflect.enum_string(m)))
}
}
strings.write_string(&sb, "@(rodata)\n")
fmt.sbprintfln(&sb, "ENCODE_FORMS := [%d]lib.Encoding{{", total_forms())
for m in Mnemonic {
forms := ENCODING_TABLE[m]
if len(forms) == 0 { continue }
fmt.sbprintfln(&sb, "\t// .%v", m)
for f in forms { write_encoding(&sb, f, max_name) }
}
strings.write_string(&sb, "}\n\n")
// Run index, one entry per mnemonic (dense, enum-ordinal order).
run_name := 0
for m in Mnemonic { run_name = max(run_name, len(reflect.enum_string(m))) }
strings.write_string(&sb, "@(rodata)\n")
strings.write_string(&sb, "ENCODE_RUNS := [lib.Mnemonic]lib.Encode_Run{\n")
start := 0
for m in Mnemonic {
n := len(ENCODING_TABLE[m])
name := reflect.enum_string(m)
fmt.sbprintf(&sb, "\t.%s", name)
for _ in 0..<run_name-len(name) { strings.write_byte(&sb, ' ') }
fmt.sbprintfln(&sb, " = {{% 5d, % 3d}},", start, n)
start += n
}
strings.write_string(&sb, "}\n")
emit_file(DIR_GEN + "encode_tables.odin", &sb)
}
write_encoding :: proc(sb: ^strings.Builder, e: lib.Encoding, max_name: int) {
strings.write_string(sb, "\t{")
print_enum_buffered(sb, e.mnemonic, max_name, true)
strings.write_string(sb, "{")
for op, i in e.ops { print_enum_buffered(sb, op, 9, i+1 < len(e.ops)) }
strings.write_string(sb, "}, {")
for en, i in e.enc { print_enum_buffered(sb, en, 4, i+1 < len(e.enc)) }
strings.write_string(sb, "}, ")
fmt.sbprintf(sb, "0x%02X, %d, ", e.opcode, e.ext)
write_flags(sb, e.flags)
strings.write_string(sb, "},\n")
}
// -----------------------------------------------------------------------------
// Decode side
// -----------------------------------------------------------------------------
Collected_Entry :: struct {
esc: lib.Escape,
prefix: u8,
opcode: u8,
ext: u8,
mnemonic: lib.Mnemonic,
ops: [4]lib.Operand_Type,
enc: [4]lib.Operand_Encoding,
flags: lib.Encoding_Flags,
vex_w: lib.VEX_W,
vex_l: lib.VEX_L,
}
emit_decode_tables :: proc() -> (n_legacy, n_vex, n_evex: int) {
legacy, vex, evex: [dynamic]Collected_Entry
for m in Mnemonic {
for enc in ENCODING_TABLE[m] {
e := Collected_Entry{
esc = enc.flags.esc,
prefix = enc.flags.prefix,
opcode = enc.opcode,
ext = enc.flags.modrm_reg_ext ? enc.ext : 0xFF,
mnemonic = enc.mnemonic,
ops = enc.ops,
enc = enc.enc,
flags = enc.flags,
vex_w = enc.flags.vex_w,
vex_l = enc.flags.vex_l,
}
switch enc.flags.vex_type {
case .VEX: append(&vex, e)
case .EVEX: append(&evex, e)
case .NONE, .XOP: append(&legacy, e)
}
}
}
slice.sort_by(legacy[:], entry_less)
slice.sort_by(vex[:], entry_less)
slice.sort_by(evex[:], entry_less)
sb := strings.builder_make()
strings.write_string(&sb, "package rexcode_x86_generated\n\n")
strings.write_string(&sb, "// GENERATED by ../gen.odin -- DO NOT EDIT.\n")
strings.write_string(&sb, "// Reverse decode tables (source: ENCODING_TABLE), keyed (esc, prefix, opcode, ext).\n\n")
strings.write_string(&sb, "import lib \"../..\"\n\n")
gen_modrm(&sb)
gen_sib(&sb)
gen_entries(&sb, "LEGACY_DECODE_ENTRIES", "lib.Decode_Entry", legacy[:], false)
gen_entries(&sb, "VEX_DECODE_ENTRIES", "lib.VEX_Decode_Entry", vex[:], true)
gen_entries(&sb, "EVEX_DECODE_ENTRIES", "lib.VEX_Decode_Entry", evex[:], true)
gen_legacy_index(&sb, legacy[:])
gen_vex_index(&sb, vex[:], "VEX")
gen_vex_index(&sb, evex[:], "EVEX")
emit_file(DIR_GEN + "decode_tables.odin", &sb)
return len(legacy), len(vex), len(evex)
}
entry_less :: proc(a, b: Collected_Entry) -> bool {
if a.esc != b.esc { return int(a.esc) < int(b.esc) }
if a.prefix != b.prefix { return a.prefix < b.prefix }
if a.opcode != b.opcode { return a.opcode < b.opcode }
return a.ext < b.ext
}
gen_entries :: proc(sb: ^strings.Builder, name, typ: string, entries: []Collected_Entry, is_vex: bool) {
max_name := 0
for e in entries { max_name = max(max_name, len(reflect.enum_string(e.mnemonic))) }
strings.write_string(sb, "@(rodata)\n")
fmt.sbprintfln(sb, "%s := [%d]%s{{", name, len(entries), typ)
for e in entries {
strings.write_string(sb, "\t{")
print_enum_buffered(sb, e.esc, 5, true)
fmt.sbprintf(sb, "%d, 0x%02X, 0x%02X, ", e.prefix, e.opcode, e.ext)
if is_vex {
print_enum_buffered(sb, e.vex_w, 4, true)
print_enum_buffered(sb, e.vex_l, 4, true)
}
print_enum_buffered(sb, e.mnemonic, max_name, true)
strings.write_string(sb, "{")
for op, i in e.ops { print_enum_buffered(sb, op, 9, i+1 < len(e.ops)) }
strings.write_string(sb, "}, {")
for en, i in e.enc { print_enum_buffered(sb, en, 4, i+1 < len(e.enc)) }
strings.write_string(sb, "}, ")
write_flags(sb, e.flags)
strings.write_string(sb, "},\n")
}
strings.write_string(sb, "}\n\n")
}
find_run :: proc(entries: []Collected_Entry, esc: lib.Escape, prefix, opcode: u8) -> (start, count: int) {
found := false
for e, idx in entries {
if e.esc == esc && e.prefix == prefix && e.opcode == opcode {
if !found { start = idx; found = true }
count += 1
} else if found {
break
}
}
return
}
prefix_name :: proc(p: int) -> string {
switch p {
case 0: return "none"
case 1: return "66"
case 2: return "F3"
case 3: return "F2"
}
return "?"
}
gen_legacy_index :: proc(sb: ^strings.Builder, entries: []Collected_Entry) {
for esc in lib.Escape {
name: string
switch esc {
case .NONE: name = "DECODE_INDEX_LEGACY"
case ._0F: name = "DECODE_INDEX_ESC_0F"
case ._0F38: name = "DECODE_INDEX_ESC_0F38"
case ._0F3A: name = "DECODE_INDEX_ESC_0F3A"
}
strings.write_string(sb, "@(rodata)\n")
fmt.sbprintfln(sb, "%s := [4][256]lib.Decode_Index{{", name)
for prefix in 0..<4 {
fmt.sbprintfln(sb, "\t{{ // prefix = %s", prefix_name(prefix))
for opcode in 0..<256 {
start, count := find_run(entries, esc, u8(prefix), u8(opcode))
if count > 0 {
fmt.sbprintfln(sb, "\t\t0x%02X = {{% 4d, % 2d}},", opcode, start, count)
}
}
strings.write_string(sb, "\t},\n")
}
strings.write_string(sb, "}\n\n")
}
}
gen_vex_index :: proc(sb: ^strings.Builder, entries: []Collected_Entry, kind: string) {
for esc_idx in 0..<3 {
esc: lib.Escape
esc_name: string
switch esc_idx {
case 0: esc = ._0F; esc_name = "0F"
case 1: esc = ._0F38; esc_name = "0F38"
case 2: esc = ._0F3A; esc_name = "0F3A"
}
strings.write_string(sb, "@(rodata)\n")
fmt.sbprintfln(sb, "%s_INDEX_%s := [4][256]lib.Decode_Index{{", kind, esc_name)
for prefix in 0..<4 {
any := false
for opcode in 0..<256 {
_, count := find_run(entries, esc, u8(prefix), u8(opcode))
if count > 0 { any = true; break }
}
fmt.sbprintf(sb, "\t%d = {{ /* prefix = %s */", prefix, prefix_name(prefix))
if !any {
strings.write_string(sb, " },\n")
continue
}
strings.write_string(sb, "\n")
for opcode in 0..<256 {
start, count := find_run(entries, esc, u8(prefix), u8(opcode))
if count > 0 {
fmt.sbprintfln(sb, "\t\t0x%02X = {{% 3d, % 2d}},", opcode, start, count)
}
}
strings.write_string(sb, "\t},\n")
}
strings.write_string(sb, "}\n\n")
}
}
gen_modrm :: proc(sb: ^strings.Builder) {
strings.write_string(sb, "@(rodata)\n")
strings.write_string(sb, "MODRM_TABLE := [256]lib.ModRM_Info{\n")
for i in 0..<256 {
modrm := u8(i)
mod := (modrm >> 6) & 0x3
reg := (modrm >> 3) & 0x7
rm := modrm & 0x7
has_sib := (rm == 4) && (mod != 3)
disp_size: u8 = 0
if mod == 0 && rm == 5 { disp_size = 4 }
else if mod == 1 { disp_size = 1 }
else if mod == 2 { disp_size = 4 }
if i % 4 == 0 { strings.write_string(sb, "\t") }
fmt.sbprintf(sb, "{{%d, %d, %d, %s, %d}}, ", mod, reg, rm, has_sib ? " true" : "false", disp_size)
if (i + 1) % 4 == 0 { strings.write_string(sb, "\n") }
}
strings.write_string(sb, "}\n\n")
}
gen_sib :: proc(sb: ^strings.Builder) {
strings.write_string(sb, "@(rodata)\n")
strings.write_string(sb, "SIB_TABLE := [256]lib.SIB_Info{\n")
for i in 0..<256 {
sib := u8(i)
scale: u8 = 1 << ((sib >> 6) & 0x3)
index := (sib >> 3) & 0x7
base := sib & 0x7
index_out := index == 4 ? u8(0xFF) : index
fmt.sbprintf(sb, "\t{{%d, % 3d, %d}},", scale, index_out, base)
if (i + 1) % 4 == 0 { strings.write_string(sb, "\n") }
}
strings.write_string(sb, "}\n\n")
}
// -----------------------------------------------------------------------------
// Shared formatting helpers (ported from the original decode generator)
// -----------------------------------------------------------------------------
print_enum_buffered :: proc(sb: ^strings.Builder, x: $T, max_name: int, comma: bool) {
fmt.sbprintf(sb, ".%v", x)
if comma { strings.write_string(sb, ", ") } else { return }
for n := max_name - len(reflect.enum_string(x)); n > 0; n -= 1 {
strings.write_byte(sb, ' ')
}
}
// Complete Encoding_Flags emitter -- every field, so ENCODE_FORMS round-trips
// the SoT exactly (mode_32_only is read by the encoder).
write_flags :: proc(sb: ^strings.Builder, flags: lib.Encoding_Flags) {
parts: [dynamic]string
defer delete(parts)
if flags.esc != .NONE { append(&parts, fmt.tprintf("esc=.%v", flags.esc)) }
if flags.prefix != 0 { append(&parts, fmt.tprintf("prefix=%d", flags.prefix)) }
if flags.vex_type != .NONE { append(&parts, fmt.tprintf("vex_type=.%v", flags.vex_type)) }
if flags.vex_w != .WIG { append(&parts, fmt.tprintf("vex_w=.%v", flags.vex_w)) }
if flags.vex_l != .LIG { append(&parts, fmt.tprintf("vex_l=.%v", flags.vex_l)) }
if flags.default_64 { append(&parts, "default_64=true") }
if flags.force_rex_w { append(&parts, "force_rex_w=true") }
if flags.no_rex { append(&parts, "no_rex=true") }
if flags.lock_ok { append(&parts, "lock_ok=true") }
if flags.rep_ok { append(&parts, "rep_ok=true") }
if flags.modrm_reg_ext { append(&parts, "modrm_reg_ext=true") }
if flags.mode_32_only { append(&parts, "mode_32_only=true") }
strings.write_string(sb, "{")
for part, i in parts {
if i > 0 { strings.write_string(sb, ", ") }
strings.write_string(sb, part)
}
strings.write_string(sb, "}")
}
// -----------------------------------------------------------------------------
// Stage B writer + the library loader
// -----------------------------------------------------------------------------
emit_writer :: proc() {
sb := strings.builder_make()
strings.write_string(&sb, "package rexcode_x86_generated\n\n")
strings.write_string(&sb, "// GENERATED by ../gen.odin -- DO NOT EDIT.\n")
strings.write_string(&sb, "// Stage B: serialize the typed tables above to raw blobs under ../../tables/.\n\n")
strings.write_string(&sb, "import \"core:os\"\nimport \"core:fmt\"\n\n")
strings.write_string(&sb, "TABLES :: #directory + \"/../../tables/\"\n\n")
strings.write_string(&sb, "raw :: #force_inline proc \"contextless\" (p: rawptr, n: int) -> []u8 {\n")
strings.write_string(&sb, "\treturn (cast([^]u8)p)[:n]\n}\n\n")
strings.write_string(&sb, "w :: proc(file: string, data: []u8) {\n")
strings.write_string(&sb, "\tif err := os.write_entire_file(file, data); err != nil {\n")
strings.write_string(&sb, "\t\tfmt.eprintfln(\"rexcode tablegen: failed to write %s: %v\", file, err)\n")
strings.write_string(&sb, "\t\tos.exit(1)\n\t}\n}\n\n")
strings.write_string(&sb, "main :: proc() {\n")
for b in BLOBS {
fmt.sbprintfln(&sb, "\tw(TABLES + \"%s\", raw(&%s, size_of(%s)))", b.file, b.global, b.global)
}
strings.write_string(&sb, "}\n")
emit_file(DIR_GEN + "writer.odin", &sb)
}
LOADER_TYPES :: `// -----------------------------------------------------------------------------
// Subsidiary table types (generated scaffolding)
// -----------------------------------------------------------------------------
// Companion run index: ENCODE_RUNS[mnemonic] -> contiguous run in ENCODE_FORMS.
Encode_Run :: struct {
start: u32, // start index in ENCODE_FORMS
count: u32, // number of forms for this mnemonic
}
// Precomputed extraction of mod, reg, rm fields from a ModR/M byte.
ModRM_Info :: struct #packed {
mod: u8,
reg: u8,
rm: u8,
has_sib: bool,
disp_size: u8,
}
// Precomputed extraction of scale, index, base fields from a SIB byte.
SIB_Info :: struct #packed {
scale: u8,
index: u8,
base: u8,
}
// Information needed to decode an instruction given its opcode bytes.
Decode_Entry :: struct {
esc: Escape,
prefix: u8,
opcode: u8,
ext: u8,
mnemonic: Mnemonic,
ops: [4]Operand_Type,
enc: [4]Operand_Encoding,
flags: Encoding_Flags,
}
VEX_Decode_Entry :: struct {
esc: Escape,
prefix: u8,
opcode: u8,
ext: u8,
vex_w: VEX_W,
vex_l: VEX_L,
mnemonic: Mnemonic,
ops: [4]Operand_Type,
enc: [4]Operand_Encoding,
flags: Encoding_Flags,
}
// (start, count) into a *_DECODE_ENTRIES array. Index tables are stored flat:
// a logical [4][256] is loaded as [1024]; address with ` + "`didx`" + `.
Decode_Index :: struct {
start: u16,
count: u8,
}
`
LOADER_ACCESSORS :: `// -----------------------------------------------------------------------------
// Accessors
// -----------------------------------------------------------------------------
// Per-mnemonic encode forms: the run of ENCODE_FORMS belonging to ` + "`m`" + `.
// Replaces the old ENCODING_TABLE[m] slice; the returned view is into rodata.
@(private, require_results)
encoding_forms :: #force_inline proc "contextless" (m: Mnemonic) -> []Encoding {
r := ENCODE_RUNS[u16(m)]
return ENCODE_FORMS[r.start:][:r.count]
}
// Flat [prefix][opcode] lookup into a logical [4][256] index table.
@(private, require_results)
didx :: #force_inline proc "contextless" (t: []Decode_Index, prefix, opcode: u8) -> Decode_Index {
return t[(int(prefix) << 8) | int(opcode)]
}
`
emit_loader :: proc() {
sb := strings.builder_make()
strings.write_string(&sb, "package rexcode_x86\n\n")
strings.write_string(&sb, "// =============================================================================\n")
strings.write_string(&sb, "// GENERATED FILE - DO NOT EDIT\n")
strings.write_string(&sb, "// =============================================================================\n")
strings.write_string(&sb, "//\n")
strings.write_string(&sb, "// Loads the flat binary encode/decode tables into @(rodata) and exposes the\n")
strings.write_string(&sb, "// accessors the encoder/decoder drive. Produced by tablegen:\n")
strings.write_string(&sb, "//\n")
strings.write_string(&sb, "// odin run tablegen # Stage A: ENCODING_TABLE -> generated/ + this file\n")
strings.write_string(&sb, "// odin run tablegen/generated # Stage B: typed Odin literals -> tables/*.bin\n")
strings.write_string(&sb, "//\n")
strings.write_string(&sb, "// The .bin blobs are raw, host-endian, packed struct images.\n\n")
strings.write_string(&sb, LOADER_TYPES)
strings.write_string(&sb, "\n// -----------------------------------------------------------------------------\n")
strings.write_string(&sb, "// Loaded tables (rodata, embedded from tables/*.bin at compile time)\n")
strings.write_string(&sb, "// -----------------------------------------------------------------------------\n\n")
gmax, fmax := 0, 0
for b in BLOBS {
gmax = max(gmax, len(b.global))
fmax = max(fmax, len(b.file))
}
for b in BLOBS {
fmt.sbprintf(&sb, "@(rodata) %s", b.global)
for _ in 0..<gmax-len(b.global) { strings.write_byte(&sb, ' ') }
path := fmt.tprintf("\"tables/%s\",", b.file)
fmt.sbprintf(&sb, " := #load(%s", path)
for _ in 0..<fmax-len(b.file) { strings.write_byte(&sb, ' ') }
fmt.sbprintfln(&sb, " []%s)", b.typ)
}
strings.write_string(&sb, "\n")
strings.write_string(&sb, LOADER_ACCESSORS)
emit_file(PATH_LOADER, &sb)
}
GEN_ATTRIB :: "// rexcode · Brendan Punsky (dotbmp@github), original author\n\n"
emit_file :: proc(path: string, sb: ^strings.Builder) {
if err := os.write_entire_file(path, transmute([]u8)strings.concatenate({GEN_ATTRIB, strings.to_string(sb^)})); err != nil {
fmt.eprintfln("rexcode tablegen: failed to write %s: %v", path, err)
os.exit(1)
}
}