bench: add --mode=gen-osc to generate synthetic OSC sequences (#7359)

cc @qwerasd205 

This commit adds a few new mode flags to the `bench-stream` program to
generator synthetic OSC sequences. The new modes are `gen-osc`,
`gen-osc-valid`, and `gen-osc-invalid`. The `gen-osc` mode generates
equal parts valid and invalid OSC sequences, while the suffixed variants
are for generating only valid or invalid sequences, respectively.

This commit also fixes our build system to actually be able to build the
benchmarks. It turns out we were just rebuilding the main Ghostty binary
for `-Demit-bench`. And, our benchmarks didn't run under Zig 0.14, which
is now fixed.

An important new design I'm working towards in this commit is to split
out synthetic data generation to a dedicated package in
`src/bench/synth` although I'm tempted to move it to `src/synth` since
it may be useful outside of benchmarks.

The synth package is a work-in-progress, but it contains a hint of
what's to come. I ultimately want to able to generate all kinds of
synthetic data with a lot of knobs to control dimensionality (e.g. in
the case of OSC sequences: valid/invalid, length, operation types,
etc.).
This commit is contained in:
Mitchell Hashimoto
2025-05-15 20:19:37 -07:00
committed by GitHub
10 changed files with 255 additions and 9 deletions

View File

@@ -68,7 +68,7 @@ pub fn main() !void {
var args: Args = .{};
defer args.deinit();
{
var iter = try std.process.argsWithAllocator(alloc);
var iter = try cli.args.argsIterator(alloc);
defer iter.deinit();
try cli.args.parse(Args, alloc, &args, &iter);
}

View File

@@ -60,7 +60,7 @@ pub fn main() !void {
var args: Args = .{};
defer args.deinit();
{
var iter = try std.process.argsWithAllocator(alloc);
var iter = try cli.args.argsIterator(alloc);
defer iter.deinit();
try cli.args.parse(Args, alloc, &args, &iter);
}

View File

@@ -45,7 +45,7 @@ pub fn main() !void {
var args: Args = .{};
defer args.deinit();
{
var iter = try std.process.argsWithAllocator(alloc);
var iter = try cli.args.argsIterator(alloc);
defer iter.deinit();
try cli.args.parse(Args, alloc, &args, &iter);
}

View File

@@ -27,7 +27,7 @@ pub fn main() !void {
var args: Args = args: {
var args: Args = .{};
errdefer args.deinit();
var iter = try std.process.argsWithAllocator(alloc);
var iter = try cli.args.argsIterator(alloc);
defer iter.deinit();
try cli.args.parse(Args, alloc, &args, &iter);
break :args args;

View File

@@ -15,6 +15,7 @@ const ArenaAllocator = std.heap.ArenaAllocator;
const ziglyph = @import("ziglyph");
const cli = @import("../cli.zig");
const terminal = @import("../terminal/main.zig");
const synth = @import("synth/main.zig");
const Args = struct {
mode: Mode = .noop,
@@ -70,6 +71,14 @@ const Mode = enum {
// Generate an infinite stream of arbitrary random bytes.
@"gen-rand",
// Generate an infinite stream of OSC requests. These will be mixed
// with valid and invalid OSC requests by default, but the
// `-valid` and `-invalid`-suffixed variants can be used to get only
// a specific type of OSC request.
@"gen-osc",
@"gen-osc-valid",
@"gen-osc-invalid",
};
pub const std_options: std.Options = .{
@@ -84,7 +93,7 @@ pub fn main() !void {
var args: Args = .{};
defer args.deinit();
{
var iter = try std.process.argsWithAllocator(alloc);
var iter = try cli.args.argsIterator(alloc);
defer iter.deinit();
try cli.args.parse(Args, alloc, &args, &iter);
}
@@ -100,6 +109,9 @@ pub fn main() !void {
.@"gen-ascii" => try genAscii(writer, seed),
.@"gen-utf8" => try genUtf8(writer, seed),
.@"gen-rand" => try genRand(writer, seed),
.@"gen-osc" => try genOsc(writer, seed, 0.5),
.@"gen-osc-valid" => try genOsc(writer, seed, 1.0),
.@"gen-osc-invalid" => try genOsc(writer, seed, 0.0),
.noop => try benchNoop(reader, buf),
// Handle the ones that depend on terminal state next
@@ -142,7 +154,7 @@ fn genAscii(writer: anytype, seed: u64) !void {
/// Generates an infinite stream of bytes from the given alphabet.
fn genData(writer: anytype, alphabet: []const u8, seed: u64) !void {
var prng = std.rand.DefaultPrng.init(seed);
var prng = std.Random.DefaultPrng.init(seed);
const rnd = prng.random();
var buf: [1024]u8 = undefined;
while (true) {
@@ -159,7 +171,7 @@ fn genData(writer: anytype, alphabet: []const u8, seed: u64) !void {
}
fn genUtf8(writer: anytype, seed: u64) !void {
var prng = std.rand.DefaultPrng.init(seed);
var prng = std.Random.DefaultPrng.init(seed);
const rnd = prng.random();
var buf: [1024]u8 = undefined;
while (true) {
@@ -180,8 +192,22 @@ fn genUtf8(writer: anytype, seed: u64) !void {
}
}
fn genOsc(writer: anytype, seed: u64, p_valid: f64) !void {
var prng = std.Random.DefaultPrng.init(seed);
const gen: synth.OSC = .{ .rand = prng.random(), .p_valid = p_valid };
var buf: [1024]u8 = undefined;
while (true) {
const seq = try gen.next(&buf);
writer.writeAll(seq) catch |err| switch (err) {
error.BrokenPipe => return, // stdout closed
else => return err,
};
}
}
fn genRand(writer: anytype, seed: u64) !void {
var prng = std.rand.DefaultPrng.init(seed);
var prng = std.Random.DefaultPrng.init(seed);
const rnd = prng.random();
var buf: [1024]u8 = undefined;
while (true) {

15
src/bench/synth/main.zig Normal file
View File

@@ -0,0 +1,15 @@
//! Package synth contains functions for generating synthetic data for
//! the purpose of benchmarking, primarily. This can also probably be used
//! for testing and fuzzing (probably generating a corpus rather than
//! directly fuzzing) and more.
//!
//! The synthetic data generators in this package are usually not performant
//! enough to be streamed in real time. They should instead be used to
//! generate a large amount of data in a single go and then streamed
//! from there.
pub const OSC = @import("osc.zig").Generator;
test {
@import("std").testing.refAllDecls(@This());
}

197
src/bench/synth/osc.zig Normal file
View File

@@ -0,0 +1,197 @@
const std = @import("std");
const assert = std.debug.assert;
/// Synthetic OSC request generator.
///
/// I tried to balance generality and practicality. I implemented mainly
/// all I need at the time of writing this, but I think this can be iterated
/// over time to be a general purpose OSC generator with a lot of
/// configurability. I limited the configurability to what I need but still
/// tried to lay out the code in a way that it can be extended easily.
pub const Generator = struct {
/// Random number generator.
rand: std.Random,
/// Probability of a valid OSC sequence being generated.
p_valid: f64 = 1.0,
pub const Error = error{NoSpaceLeft};
/// We use a FBS as a direct parameter below in non-pub functions,
/// but we should probably just switch to `[]u8`.
const FBS = std.io.FixedBufferStream([]u8);
/// Get the next OSC request in bytes. The generated OSC request will
/// have the prefix `ESC ]` and the terminator `BEL` (0x07).
///
/// This will generate both valid and invalid OSC requests (based on
/// the `p_valid` probability value). Invalid requests still have the
/// prefix and terminator, but the content in between is not a valid
/// OSC request.
///
/// The buffer must be at least 3 bytes long to accommodate the
/// prefix and terminator.
pub fn next(self: *const Generator, buf: []u8) Error![]const u8 {
assert(buf.len >= 3);
var fbs: FBS = std.io.fixedBufferStream(buf);
const writer = fbs.writer();
// Start OSC (ESC ])
try writer.writeAll("\x1b]");
// Determine if we are generating a valid or invalid OSC request.
switch (self.chooseValidity()) {
.valid => try self.nextValid(&fbs),
.invalid => try self.nextInvalid(&fbs),
}
// Terminate OSC
try writer.writeAll("\x07");
return fbs.getWritten();
}
fn nextValid(self: *const Generator, fbs: *FBS) Error!void {
try self.nextValidExact(fbs, self.rand.enumValue(ValidKind));
}
fn nextValidExact(self: *const Generator, fbs: *FBS, k: ValidKind) Error!void {
switch (k) {
.change_window_title => {
try fbs.writer().writeAll("0;"); // Set window title
try self.randomBytes(fbs, 1, fbs.buffer.len);
},
.prompt_start => {
try fbs.writer().writeAll("133;A"); // Start prompt
// aid
if (self.rand.boolean()) {
try fbs.writer().writeAll(";aid=");
try self.randomBytes(fbs, 1, 16);
}
// redraw
if (self.rand.boolean()) {
try fbs.writer().writeAll(";redraw=");
if (self.rand.boolean()) {
try fbs.writer().writeAll("1");
} else {
try fbs.writer().writeAll("0");
}
}
},
.prompt_end => try fbs.writer().writeAll("133;B"), // End prompt
}
}
fn nextInvalid(self: *const Generator, fbs: *FBS) Error!void {
switch (self.rand.enumValue(InvalidKind)) {
.random => try self.randomBytes(fbs, 1, fbs.buffer.len),
.good_prefix => {
try fbs.writer().writeAll("133;");
try self.randomBytes(fbs, 2, fbs.buffer.len);
},
}
}
/// Generate a random string of bytes up to `max_len` bytes or
/// until we run out of space in the buffer, whichever is
/// smaller.
///
/// This will avoid the terminator characters (0x1B and 0x07) and
/// replace them by incrementing them by one.
fn randomBytes(
self: *const Generator,
fbs: *FBS,
min_len: usize,
max_len: usize,
) Error!void {
const len = @min(
self.rand.intRangeAtMostBiased(usize, min_len, max_len),
fbs.buffer.len - fbs.pos - 1, // leave space for terminator
);
var rem: usize = len;
var buf: [1024]u8 = undefined;
while (rem > 0) {
self.rand.bytes(&buf);
std.mem.replaceScalar(u8, &buf, 0x1B, 0x1C);
std.mem.replaceScalar(u8, &buf, 0x07, 0x08);
const n = @min(rem, buf.len);
try fbs.writer().writeAll(buf[0..n]);
rem -= n;
}
}
/// Choose whether to generate a valid or invalid OSC request based
/// on the validity probability.
fn chooseValidity(self: *const Generator) Validity {
return if (self.rand.float(f64) > self.p_valid)
.invalid
else
.valid;
}
const Validity = enum { valid, invalid };
const ValidKind = enum {
change_window_title,
prompt_start,
prompt_end,
};
const InvalidKind = enum {
/// Literally random bytes. Might even be valid, but probably not.
random,
/// A good prefix, but ultimately invalid format.
good_prefix,
};
};
/// A fixed seed we can use for our tests to avoid flakes.
const test_seed = 0xC0FFEEEEEEEEEEEE;
test "OSC generator" {
var prng = std.Random.DefaultPrng.init(test_seed);
var buf: [4096]u8 = undefined;
const gen: Generator = .{ .rand = prng.random() };
for (0..50) |_| _ = try gen.next(&buf);
}
test "OSC generator valid" {
const testing = std.testing;
const terminal = @import("../../terminal/main.zig");
var prng = std.Random.DefaultPrng.init(test_seed);
var buf: [256]u8 = undefined;
const gen: Generator = .{
.rand = prng.random(),
.p_valid = 1.0,
};
for (0..50) |_| {
const seq = try gen.next(&buf);
var parser: terminal.osc.Parser = .{};
for (seq[2 .. seq.len - 1]) |c| parser.next(c);
try testing.expect(parser.end(null) != null);
}
}
test "OSC generator invalid" {
const testing = std.testing;
const terminal = @import("../../terminal/main.zig");
var prng = std.Random.DefaultPrng.init(test_seed);
var buf: [256]u8 = undefined;
const gen: Generator = .{
.rand = prng.random(),
.p_valid = 0.0,
};
for (0..50) |_| {
const seq = try gen.next(&buf);
var parser: terminal.osc.Parser = .{};
for (seq[2 .. seq.len - 1]) |c| parser.next(c);
try testing.expect(parser.end(null) == null);
}
}

View File

@@ -60,6 +60,9 @@ pub fn changeEntrypoint(
var result = self.*;
result.config = config;
result.options = b.addOptions();
try config.addOptions(result.options);
return result;
}

View File

@@ -182,6 +182,7 @@ test {
_ = @import("surface_mouse.zig");
// Libraries
_ = @import("bench/synth/main.zig");
_ = @import("crash/main.zig");
_ = @import("datastruct/main.zig");
_ = @import("inspector/main.zig");

View File

@@ -6,6 +6,7 @@
const osc = @This();
const std = @import("std");
const builtin = @import("builtin");
const mem = std.mem;
const assert = std.debug.assert;
const Allocator = mem.Allocator;
@@ -1332,7 +1333,10 @@ pub const Parser = struct {
/// the response terminator.
pub fn end(self: *Parser, terminator_ch: ?u8) ?Command {
if (!self.complete) {
log.warn("invalid OSC command: {s}", .{self.buf[0..self.buf_idx]});
if (comptime !builtin.is_test) log.warn(
"invalid OSC command: {s}",
.{self.buf[0..self.buf_idx]},
);
return null;
}