Files
Odin/core/rexcode/ppc/tablegen/gen.odin
Flāvius a4f08f8307 Load rexcode encode/decode tables from committed binary blobs
Each ISA's hand-written ENCODING_TABLE (the single source of truth) now lives
in a per-arch tablegen/ metaprogram that flattens it and serializes committed
binary blobs; the library #loads those into @(rodata) at compile time rather
than compiling a table body. No arch keeps encoding_table.odin or
decoding_tables.odin -- only a generated tables.odin loader and tables/*.bin.

* Two-stage, type-checked pipeline: tablegen Stage A emits human-readable
  generated Odin, which compiles and serializes the blobs in Stage B.
* encode() goes through encoding_forms(m); decoders are unchanged apart from
  x86's flattened 2-D index. Decode tables are byte-identical to the old ones.
* build.lua: a LuaJIT driver for the metaprograms, validations, and tests,
  with cross-platform gating and a clear report.
* Docs refreshed; the obsolete forward-looking plan in cross_arch_design.md
  trimmed to what was actually built.
* Attribution headers added to all rexcode source files; the generators emit
  them so generated files keep them.
2026-06-15 07:43:29 -04:00

448 lines
17 KiB
Odin

// rexcode · Brendan Punsky (dotbmp@github), original author
package rexcode_ppc_tablegen
// =============================================================================
// POWERPC TABLE GENERATOR (Stage A)
// =============================================================================
//
// Reads the single-source-of-truth ENCODING_TABLE (encoding_table.odin, this
// package) and emits human-readable, type-checked Odin into ./generated/:
//
// generated/encode_tables.odin ENCODE_FORMS + ENCODE_RUNS + PREFIX_BITS_TABLE
// generated/decode_tables.odin DECODE_ENTRIES + DECODE_FORM_IDX +
// DECODE_BUCKET_LIST + primary/sub index tables
// generated/writer.odin Stage B: serialize those globals to ../../tables/*.bin
//
// It also re-emits the library loader ../tables.odin. Run:
// odin run ppc/tablegen # Stage A
// odin run ppc/tablegen/generated # Stage B
//
// PowerPC dispatch is two-level (one DECODE_ENTRIES array, two index tables):
// Primary (64 buckets): key = bits[26:31] (6-bit primary opcode).
// Secondary (16384): key = primary*256 + bits[1:8] (low XO bits, skip Rc).
// Each bucket points to a contiguous run in DECODE_BUCKET_LIST of entry indices,
// sorted by mask popcount descending so the most-specific match wins first.
import "core:fmt"
import "core:os"
import "core:strings"
import "core:slice"
import "core:reflect"
import "core:math/bits"
import lib "../"
// Package-scope aliases so the moved SoT resolves Mnemonic/Encoding unqualified.
Encoding :: lib.Encoding
Mnemonic :: lib.Mnemonic
Blob :: struct { global, file, typ: string }
BLOBS := [?]Blob{
{"ENCODE_FORMS", "ppc.encode_forms.bin", "Encoding"},
{"ENCODE_RUNS", "ppc.encode_runs.bin", "Encode_Run"},
{"PREFIX_BITS_TABLE", "ppc.prefix_bits.bin", "u32"},
{"DECODE_ENTRIES", "ppc.entries.bin", "Decode_Entry"},
{"DECODE_FORM_IDX", "ppc.form_idx.bin", "u16"},
{"DECODE_BUCKET_LIST", "ppc.bucket_list.bin", "u16"},
{"DECODE_INDEX_PRIMARY", "ppc.idx_primary.bin", "Decode_Index"},
{"DECODE_INDEX_SUB", "ppc.idx_sub.bin", "Decode_Index"},
}
DIR_GEN :: #directory + "/generated/"
PATH_LOADER :: #directory + "/../tables.odin"
PRIMARY_BUCKETS :: 64 // bits[26:31] of word
SUB_BITS :: 8 // bits[1:8] of word (low XO bits, skipping Rc)
SUB_BUCKETS :: PRIMARY_BUCKETS * (1 << SUB_BITS) // 64 * 256 = 16384
Entry :: struct {
mnemonic: lib.Mnemonic,
ops: [4]lib.Operand_Type,
enc: [4]lib.Operand_Encoding,
bits: u32,
mask: u32,
feature: lib.Feature,
mode: lib.Mode,
flags: lib.Encoding_Flags,
form_idx: u16,
}
Range :: struct { start: u32, count: u16 }
Pair :: struct { bucket: u32, entry_idx: u16 }
main :: proc() {
n := emit_encode_tables()
ne := emit_decode_tables()
emit_writer()
emit_loader()
fmt.printfln("ppc tablegen: %d encode forms, %d decode entries", n, ne)
}
// -----------------------------------------------------------------------------
// Encode side
// -----------------------------------------------------------------------------
emit_encode_tables :: proc() -> (total: int) {
sb := strings.builder_make()
strings.write_string(&sb, "package rexcode_ppc_generated\n\n")
strings.write_string(&sb, "// GENERATED by ../gen.odin -- DO NOT EDIT.\n")
strings.write_string(&sb, "// Flattened encode forms + per-mnemonic run index + prefix words (source: ENCODING_TABLE).\n\n")
strings.write_string(&sb, "import lib \"../..\"\n\n")
for m in Mnemonic { total += len(ENCODING_TABLE[m]) }
fmt.sbprintfln(&sb, "ENCODE_FORMS := [%d]lib.Encoding{{", total)
for m in Mnemonic {
forms := ENCODING_TABLE[m]
if len(forms) == 0 { continue }
fmt.sbprintfln(&sb, "\t// .%v", m)
for f in forms {
write_row(&sb, f.mnemonic, f.ops, f.enc, f.bits, f.mask, f.feature, f.mode, f.flags)
}
}
strings.write_string(&sb, "}\n\n")
run_w := 0
for m in Mnemonic { run_w = max(run_w, len(reflect.enum_string(m))) }
strings.write_string(&sb, "ENCODE_RUNS := [lib.Mnemonic]lib.Encode_Run{\n")
start := 0
for m in Mnemonic {
c := len(ENCODING_TABLE[m])
name := reflect.enum_string(m)
fmt.sbprintf(&sb, "\t.%s", name)
for _ in 0..<run_w-len(name) { strings.write_byte(&sb, ' ') }
fmt.sbprintfln(&sb, " = {{% 5d, % 3d}},", start, c)
start += c
}
strings.write_string(&sb, "}\n\n")
// PREFIX_BITS_TABLE: dense [len(Mnemonic)]u32, indexed by mnemonic ordinal.
// (Non-prefixed mnemonics are 0; the library reads it as PREFIX_BITS_TABLE[u16(mn)].)
nmn := len(Mnemonic)
fmt.sbprintfln(&sb, "PREFIX_BITS_TABLE := [%d]u32{{", nmn)
for m in Mnemonic {
v := PREFIX_BITS_TABLE[m]
if v != 0 {
fmt.sbprintfln(&sb, "\t%d = 0x%08X, // .%v", u16(m), v, m)
}
}
strings.write_string(&sb, "}\n")
emit_file(DIR_GEN + "encode_tables.odin", &sb)
return
}
// -----------------------------------------------------------------------------
// Decode side (ported from the old tools/gen_decode_tables.odin)
// -----------------------------------------------------------------------------
emit_decode_tables :: proc() -> (total: int) {
all: [dynamic]Entry
defer delete(all)
for mn in Mnemonic {
for f, fi in ENCODING_TABLE[mn] {
append(&all, Entry{
mnemonic = mn,
ops = f.ops,
enc = f.enc,
bits = f.bits,
mask = f.mask,
feature = f.feature,
mode = f.mode,
flags = f.flags,
form_idx = u16(fi),
})
}
}
// Sort the global entries array: by primary opcode, then mask popcount
// descending so within-bucket scan picks the most specific first.
slice.sort_by(all[:], proc(x, y: Entry) -> bool {
px := (x.bits >> 26) & 0x3F
py := (y.bits >> 26) & 0x3F
if px != py { return px < py }
xc := bits.count_ones(x.mask)
yc := bits.count_ones(y.mask)
if xc != yc { return xc > yc }
return u16(x.mnemonic) < u16(y.mnemonic)
})
// For each entry, enumerate the (primary, sub-key) pairs it can match.
primary_pairs: [dynamic]Pair
sub_pairs: [dynamic]Pair
defer delete(primary_pairs); defer delete(sub_pairs)
keys: [dynamic]u32
defer delete(keys)
for e, i in all {
// Primary key
enumerate_keys(e.bits, e.mask, 26, 6, &keys)
for k in keys {
append(&primary_pairs, Pair{bucket = k, entry_idx = u16(i)})
}
// Sub key: primary << SUB_BITS | bits[1..SUB_BITS+1)
prim_keys: [dynamic]u32
sub_only: [dynamic]u32
defer delete(prim_keys); defer delete(sub_only)
enumerate_keys(e.bits, e.mask, 26, 6, &prim_keys)
enumerate_keys(e.bits, e.mask, 1, SUB_BITS, &sub_only)
for pk in prim_keys {
for sk in sub_only {
key := pk * (1 << SUB_BITS) + sk
append(&sub_pairs, Pair{bucket = key, entry_idx = u16(i)})
}
}
}
// Re-sort pair lists: primary order, then mask popcount descending.
rebuild :: proc(pairs: ^[dynamic]Pair, all: []Entry) {
Sort_Pair :: struct { sort_key: u64, entry_idx: u16, bucket: u32 }
sortable := make([dynamic]Sort_Pair, 0, len(pairs), context.temp_allocator)
for pp in pairs^ {
e := all[pp.entry_idx]
pop := u64(bits.count_ones(e.mask))
// (bucket << 40) | ((63 - pop) << 32) | mnemonic
key := (u64(pp.bucket) << 40) | ((63 - pop) << 32) | u64(e.mnemonic)
append(&sortable, Sort_Pair{sort_key = key, entry_idx = pp.entry_idx, bucket = pp.bucket})
}
slice.sort_by_key(sortable[:], proc(s: Sort_Pair) -> u64 { return s.sort_key })
clear(pairs)
for s in sortable { append(pairs, Pair{bucket = s.bucket, entry_idx = s.entry_idx}) }
}
rebuild(&primary_pairs, all[:])
rebuild(&sub_pairs, all[:])
// Build flat u16 dispatch list. Each bucket points to a contiguous run.
primary_idx: [PRIMARY_BUCKETS]Range
sub_idx: [SUB_BUCKETS]Range
bucket_list: [dynamic]u16
defer delete(bucket_list)
emit_pairs :: proc(pairs: []Pair, idx: []Range, list: ^[dynamic]u16) {
prev_bucket: i64 = -1
for pp in pairs {
cur := i64(pp.bucket)
if cur != prev_bucket {
idx[cur].start = u32(len(list))
idx[cur].count = 0
prev_bucket = cur
}
append(list, pp.entry_idx)
idx[cur].count += 1
}
}
emit_pairs(primary_pairs[:], primary_idx[:], &bucket_list)
emit_pairs(sub_pairs[:], sub_idx[:], &bucket_list)
sb := strings.builder_make()
strings.write_string(&sb, "package rexcode_ppc_generated\n\n")
strings.write_string(&sb, "// GENERATED by ../gen.odin -- DO NOT EDIT.\n")
strings.write_string(&sb, "// Reverse decode tables (source: ENCODING_TABLE), two-level primary+sub dispatch.\n\n")
strings.write_string(&sb, "import lib \"../..\"\n\n")
fmt.sbprintfln(&sb, "DECODE_ENTRIES := [%d]lib.Decode_Entry{{", len(all))
for e in all {
write_row(&sb, e.mnemonic, e.ops, e.enc, e.bits, e.mask, e.feature, e.mode, e.flags)
}
strings.write_string(&sb, "}\n\n")
emit_form_idx(&sb, all[:])
emit_bucket_list(&sb, bucket_list[:])
emit_range_table(&sb, "DECODE_INDEX_PRIMARY", primary_idx[:])
emit_range_table(&sb, "DECODE_INDEX_SUB", sub_idx[:])
emit_file(DIR_GEN + "decode_tables.odin", &sb)
return len(all)
}
enumerate_keys :: proc(b, mask: u32, key_shift: u32, key_bits: u32, out: ^[dynamic]u32) {
clear(out)
key_mask := (u32(1) << key_bits) - 1
fixed_key := ((b & mask) >> key_shift) & key_mask
var_bits := (~mask >> key_shift) & key_mask
sub: u32 = 0
for {
append(out, fixed_key | sub)
if var_bits == 0 { break }
if sub == var_bits { break }
sub = (sub - var_bits) & var_bits
}
}
emit_form_idx :: proc(sb: ^strings.Builder, entries: []Entry) {
all_zero := true
for e in entries {
if e.form_idx != 0 { all_zero = false; break }
}
if all_zero {
fmt.sbprintf(sb, "DECODE_FORM_IDX: [%d]u16\n\n", len(entries))
} else {
fmt.sbprintf(sb, "DECODE_FORM_IDX := [%d]u16{{", len(entries))
for e, i in entries {
if i % 64 == 0 { strings.write_string(sb, "\n\t") }
fmt.sbprintf(sb, "%d, ", e.form_idx)
}
strings.write_string(sb, "\n}\n\n")
}
}
emit_bucket_list :: proc(sb: ^strings.Builder, items: []u16) {
fmt.sbprintf(sb, "DECODE_BUCKET_LIST := [%d]u16{{", len(items))
for v, i in items {
if i % 64 == 0 { strings.write_string(sb, "\n\t") }
fmt.sbprintf(sb, "% 4d, ", v)
}
strings.write_string(sb, "\n}\n\n")
}
emit_range_table :: proc(sb: ^strings.Builder, name: string, ranges: []Range) {
fmt.sbprintf(sb, "%s := [%d]lib.Decode_Index{{", name, len(ranges))
amount_set := 0
for r, i in ranges {
if r.count != 0 {
if amount_set % 16 == 0 { strings.write_string(sb, "\n\t") }
fmt.sbprintf(sb, "0x%04X = {{% 5d, % 4d, 0}}, ", i, r.start, r.count)
amount_set += 1
}
}
strings.write_string(sb, "\n}\n\n")
}
// -----------------------------------------------------------------------------
// Shared row + flags formatting
// -----------------------------------------------------------------------------
write_row :: proc(sb: ^strings.Builder, mn: lib.Mnemonic, ops: [4]lib.Operand_Type,
enc: [4]lib.Operand_Encoding, bits, mask: u32, feature: lib.Feature,
mode: lib.Mode, flags: lib.Encoding_Flags) {
fmt.sbprintf(sb, "\t{{ .%v, {{.%v,.%v,.%v,.%v}}, {{.%v,.%v,.%v,.%v}}, 0x%08X, 0x%08X, .%v, .%v, {{%s}} }},\n",
mn, ops[0], ops[1], ops[2], ops[3], enc[0], enc[1], enc[2], enc[3], bits, mask, feature, mode, flags_lit(flags))
}
flags_lit :: proc(f: lib.Encoding_Flags) -> string {
parts: [dynamic]string
defer delete(parts)
if f.branch { append(&parts, "branch=true") }
if f.cond_branch { append(&parts, "cond_branch=true") }
if f.writes_lr { append(&parts, "writes_lr=true") }
if f.sets_cr0 { append(&parts, "sets_cr0=true") }
if f.sets_cr1 { append(&parts, "sets_cr1=true") }
if f.abs_branch { append(&parts, "abs_branch=true") }
if f.has_oe { append(&parts, "has_oe=true") }
if f.prefixed { append(&parts, "prefixed=true") }
if f.vle { append(&parts, "vle=true") }
if f.vle_short { append(&parts, "vle_short=true") }
return strings.join(parts[:], ", ", context.temp_allocator)
}
// -----------------------------------------------------------------------------
// Stage B writer + the library loader
// -----------------------------------------------------------------------------
emit_writer :: proc() {
sb := strings.builder_make()
strings.write_string(&sb, "package rexcode_ppc_generated\n\n")
strings.write_string(&sb, "// GENERATED by ../gen.odin -- DO NOT EDIT.\n")
strings.write_string(&sb, "// Stage B: serialize the typed tables above to raw blobs under ../../tables/.\n\n")
strings.write_string(&sb, "import \"core:os\"\nimport \"core:fmt\"\n\n")
strings.write_string(&sb, "TABLES :: #directory + \"/../../tables/\"\n\n")
strings.write_string(&sb, "raw :: #force_inline proc \"contextless\" (p: rawptr, n: int) -> []u8 {\n\treturn (cast([^]u8)p)[:n]\n}\n\n")
strings.write_string(&sb, "w :: proc(file: string, data: []u8) {\n")
strings.write_string(&sb, "\tif err := os.write_entire_file(file, data); err != nil {\n")
strings.write_string(&sb, "\t\tfmt.eprintfln(\"rexcode tablegen: failed to write %s: %v\", file, err)\n\t\tos.exit(1)\n\t}\n}\n\n")
strings.write_string(&sb, "main :: proc() {\n")
for b in BLOBS {
fmt.sbprintfln(&sb, "\tw(TABLES + \"%s\", raw(&%s, size_of(%s)))", b.file, b.global, b.global)
}
strings.write_string(&sb, "}\n")
emit_file(DIR_GEN + "writer.odin", &sb)
}
LOADER_TYPES :: `// -----------------------------------------------------------------------------
// Subsidiary table types (generated scaffolding)
// -----------------------------------------------------------------------------
// Companion run index: ENCODE_RUNS[mnemonic] -> contiguous run in ENCODE_FORMS.
Encode_Run :: struct {
start: u32,
count: u32,
}
Decode_Entry :: struct #packed {
mnemonic: Mnemonic, // 2
ops: [4]Operand_Type, // 4
enc: [4]Operand_Encoding, // 4
bits: u32, // 4
mask: u32, // 4
feature: Feature, // 1
mode: Mode, // 1
flags: Encoding_Flags, // 2
}
#assert(size_of(Decode_Entry) == 22)
Decode_Index :: struct #packed {
start: u32,
count: u16,
_: u16,
}
#assert(size_of(Decode_Index) == 8)
DECODE_SUB_BUCKETS :: 256 // per primary
`
LOADER_ACCESSORS :: `// -----------------------------------------------------------------------------
// Accessors
// -----------------------------------------------------------------------------
// Per-mnemonic encode forms: the run of ENCODE_FORMS belonging to ` + "`m`" + `.
// Replaces the old ENCODING_TABLE[m] slice; the returned view is into rodata.
@(private, require_results)
encoding_forms :: #force_inline proc "contextless" (m: Mnemonic) -> []Encoding {
r := ENCODE_RUNS[u16(m)]
return ENCODE_FORMS[r.start:][:r.count]
}
`
emit_loader :: proc() {
sb := strings.builder_make()
strings.write_string(&sb, "package rexcode_ppc\n\n")
strings.write_string(&sb, "// =============================================================================\n")
strings.write_string(&sb, "// GENERATED FILE - DO NOT EDIT\n")
strings.write_string(&sb, "// =============================================================================\n")
strings.write_string(&sb, "//\n")
strings.write_string(&sb, "// Loads the flat binary encode/decode tables into @(rodata). Produced by tablegen:\n")
strings.write_string(&sb, "//\n")
strings.write_string(&sb, "// odin run tablegen # Stage A: ENCODING_TABLE -> generated/ + this file\n")
strings.write_string(&sb, "// odin run tablegen/generated # Stage B: typed Odin literals -> tables/*.bin\n")
strings.write_string(&sb, "//\n")
strings.write_string(&sb, "// The .bin blobs are raw, host-endian, packed struct images.\n\n")
strings.write_string(&sb, LOADER_TYPES)
strings.write_string(&sb, "\n// -----------------------------------------------------------------------------\n")
strings.write_string(&sb, "// Loaded tables (rodata, embedded from tables/*.bin at compile time)\n")
strings.write_string(&sb, "// -----------------------------------------------------------------------------\n\n")
gmax, fmax := 0, 0
for b in BLOBS { gmax = max(gmax, len(b.global)); fmax = max(fmax, len(b.file)) }
for b in BLOBS {
fmt.sbprintf(&sb, "@(rodata) %s", b.global)
for _ in 0..<gmax-len(b.global) { strings.write_byte(&sb, ' ') }
path := fmt.tprintf("\"tables/%s\",", b.file)
fmt.sbprintf(&sb, " := #load(%s", path)
for _ in 0..<fmax-len(b.file) { strings.write_byte(&sb, ' ') }
fmt.sbprintfln(&sb, " []%s)", b.typ)
}
strings.write_string(&sb, "\n")
strings.write_string(&sb, LOADER_ACCESSORS)
emit_file(PATH_LOADER, &sb)
}
GEN_ATTRIB :: "// rexcode · Brendan Punsky (dotbmp@github), original author\n\n"
emit_file :: proc(path: string, sb: ^strings.Builder) {
if err := os.write_entire_file(path, transmute([]u8)strings.concatenate({GEN_ATTRIB, strings.to_string(sb^)})); err != nil {
fmt.eprintfln("rexcode tablegen: failed to write %s: %v", path, err)
os.exit(1)
}
}