fuzz: add replay-crashes.nu to help find crash repros

2026-04-06 07:38:21 +00:00 · 2026-03-01 20:04:35 -08:00
parent ca31828c93
commit 43ec4ace47
2 changed files with 138 additions and 0 deletions
--- a/test/fuzz-libghostty/AGENTS.md
+++ b/test/fuzz-libghostty/AGENTS.md
@@ -34,3 +34,15 @@ not from a file argument. This affects how you invoke AFL++ tools:

 If you pass `@@` or a filename argument, `afl-showmap`/`afl-cmin`
 will see only ~4 tuples (the C main paths) and produce useless results.
+
+## Replaying crashes
+
+Use `replay-crashes.nu` (Nushell) to list or replay AFL++ crash files.
+
+- **List all crash files:** `nu replay-crashes.nu --list`
+- **JSON output (for structured processing):** `nu replay-crashes.nu --json`
+  Returns an array of objects with `fuzzer`, `file`, `binary`, and `replay_cmd`.
+- **Filter by fuzzer:** `nu replay-crashes.nu --list --fuzzer stream`
+- **Replay all crashes:** `nu replay-crashes.nu`
+  Pipes each crash file into its fuzzer binary via stdin and exits non-zero
+  if any crashes still reproduce.
--- a/test/fuzz-libghostty/replay-crashes.nu
+++ b/test/fuzz-libghostty/replay-crashes.nu
@@ -0,0 +1,126 @@
+#!/usr/bin/env nu
+
+# Replay AFL++ crash files against their corresponding fuzzer binaries.
+#
+# AFL++ stores crashing inputs as raw byte files under:
+#   afl-out/<fuzzer>/default/crashes/
+#
+# Each file's name encodes metadata about how the crash was found, e.g.:
+#   id:000001,sig:06,src:004129,time:690036,execs:1989720,op:havoc,rep:4
+#
+# The fuzzer binaries (fuzz-parser, fuzz-stream) read input from stdin,
+# so each crash file is replayed by piping it into the binary:
+#   open --raw <crash-file> | fuzz-<name>
+#
+# Modes:
+#   (default)   Replay every crash file and report pass/fail.
+#   --list      Print crash file paths, one per line (no replay).
+#   --json      Emit structured JSON with fuzzer name, file path,
+#               binary path, and a ready-to-run replay command.
+#               Useful for LLM agents that need to enumerate and
+#               selectively replay specific crashes.
+#   --fuzzer    Restrict to a single fuzzer target (e.g. "stream").
+
+def main [
+    afl_out: string = "afl-out"  # Path to the AFL++ output directory
+    --list (-l)                   # List crash files without replaying
+    --json (-j)                   # Output as JSON (implies --list)
+    --fuzzer (-f): string         # Only process this fuzzer (e.g. "stream" or "parser")
+] {
+    # Directory where `zig build` places the instrumented fuzzer binaries.
+    let bin_dir = "zig-out/bin"
+
+    # All known fuzzer targets. Each one has a corresponding binary
+    # named fuzz-<name> and AFL++ output under afl-out/<name>/.
+    let all_fuzzers = [parser stream]
+
+    # If --fuzzer is given, restrict to just that target; otherwise run all.
+    let fuzzers = if $fuzzer != null { [$fuzzer] } else { $all_fuzzers }
+
+    # --json implies --list (we never replay in JSON mode).
+    let list_only = $list or $json
+
+    # Accumulator for --list/--json output records.
+    mut results = []
+
+    # Counter for crash files that still reproduce (non-zero exit from binary).
+    mut failures = 0
+
+    for fuzz in $fuzzers {
+        # AFL++ writes crash inputs to <out>/<fuzzer>/default/crashes/.
+        let crashes_dir = $"($afl_out)/($fuzz)/default/crashes"
+
+        # The replay binary for this fuzzer target.
+        let binary = $"($bin_dir)/fuzz-($fuzz)"
+
+        # Skip this fuzzer if no crashes directory exists (fuzzer may not
+        # have been run, or it found no crashes).
+        if not ($crashes_dir | path exists) {
+            continue
+        }
+
+        # Gather all crash files, filtering out:
+        #   - Directories (AFL++ may create subdirs like .synced/)
+        #   - README.txt (AFL++ places a README in the crashes dir)
+        let crash_files = (ls $crashes_dir
+            | where type == file
+            | where { |f| ($f.name | path basename) != "README.txt" }
+            | get name)
+
+        # In list-only mode, collect metadata about each crash file
+        # without actually replaying it. This lets an LLM enumerate
+        # crashes and decide which ones to investigate.
+        if $list_only {
+            for crash_file in $crash_files {
+                $results = ($results | append {
+                    fuzzer: $fuzz,
+                    file: $crash_file,
+                    binary: $binary,
+                    replay_cmd: $"open --raw ($crash_file) | ($binary)",
+                })
+            }
+            continue
+        }
+
+        # In replay mode, we need the binary to exist.
+        if not ($binary | path exists) {
+            print -e $"WARNING: binary ($binary) not found, skipping ($fuzz)"
+            continue
+        }
+
+        # Replay each crash file by piping its raw bytes into the fuzzer
+        # binary via stdin. The binary exits non-zero if the crash
+        # reproduces (e.g. SIGABRT / signal 6).
+        for crash_file in $crash_files {
+            print $"==> ($crash_file)"
+            let result = do -i { open --raw $crash_file | run-external $binary }
+            if $result != null {
+                $failures += 1
+            }
+        }
+    }
+
+    # Emit collected results in the requested format.
+    if $list_only {
+        if $json {
+            # JSON output: array of objects, each with fuzzer, file,
+            # binary, and a replay_cmd string an LLM can execute directly.
+            print ($results | to json)
+        } else {
+            # Plain text: one file path per line.
+            for r in $results {
+                print $r.file
+            }
+        }
+        return
+    }
+
+    # Summary: exit 1 if any crashes still reproduce so CI / scripts
+    # can detect regressions.
+    if $failures > 0 {
+        print $"FAILED: ($failures) crash\(es\) still reproduce"
+        exit 1
+    }
+
+    print "All crash files replayed."
+}