mirror of
https://github.com/ghostty-org/ghostty.git
synced 2025-09-05 19:08:17 +00:00
synthetic package
This introduces a new package `src/synthetic` for generating synthetic data, currently primarily for benchmarking but other use cases can emerge. The synthetic package exports a runtime-dispatched type `Generator` that can generate data of various types. To start, we have a bytes, utf8, and OSC generator. The goal of each generator is to expose knobs to tune the probabilities of various outcomes. For example, the UTF-8 generator has a knob to tune the probability of generating 1, 2, 3, or 4-byte UTF-8 sequences. Ultimately, the goal is to be able to collect probability data empirically that we can then use for benchmarks so we can optimize various parts of the codebase on real-world data shape distributions.
This commit is contained in:
@@ -12,10 +12,9 @@ const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const ArenaAllocator = std.heap.ArenaAllocator;
|
||||
const ziglyph = @import("ziglyph");
|
||||
const cli = @import("../cli.zig");
|
||||
const terminal = @import("../terminal/main.zig");
|
||||
const synth = @import("synth/main.zig");
|
||||
const synthetic = @import("../synthetic/main.zig");
|
||||
|
||||
const Args = struct {
|
||||
mode: Mode = .noop,
|
||||
@@ -102,16 +101,57 @@ pub fn main() !void {
|
||||
const writer = std.io.getStdOut().writer();
|
||||
const buf = try alloc.alloc(u8, args.@"buffer-size");
|
||||
|
||||
// Build our RNG
|
||||
const seed: u64 = if (args.seed >= 0) @bitCast(args.seed) else @truncate(@as(u128, @bitCast(std.time.nanoTimestamp())));
|
||||
var prng = std.Random.DefaultPrng.init(seed);
|
||||
const rand = prng.random();
|
||||
|
||||
// Handle the modes that do not depend on terminal state first.
|
||||
switch (args.mode) {
|
||||
.@"gen-ascii" => try genAscii(writer, seed),
|
||||
.@"gen-utf8" => try genUtf8(writer, seed),
|
||||
.@"gen-rand" => try genRand(writer, seed),
|
||||
.@"gen-osc" => try genOsc(writer, seed, 0.5),
|
||||
.@"gen-osc-valid" => try genOsc(writer, seed, 1.0),
|
||||
.@"gen-osc-invalid" => try genOsc(writer, seed, 0.0),
|
||||
.@"gen-ascii" => {
|
||||
var gen: synthetic.Bytes = .{
|
||||
.rand = rand,
|
||||
.alphabet = synthetic.Bytes.Alphabet.ascii,
|
||||
};
|
||||
try generate(writer, gen.generator());
|
||||
},
|
||||
|
||||
.@"gen-utf8" => {
|
||||
var gen: synthetic.Utf8 = .{
|
||||
.rand = rand,
|
||||
};
|
||||
try generate(writer, gen.generator());
|
||||
},
|
||||
|
||||
.@"gen-rand" => {
|
||||
var gen: synthetic.Bytes = .{ .rand = rand };
|
||||
try generate(writer, gen.generator());
|
||||
},
|
||||
|
||||
.@"gen-osc" => {
|
||||
var gen: synthetic.Osc = .{
|
||||
.rand = rand,
|
||||
.p_valid = 0.5,
|
||||
};
|
||||
try generate(writer, gen.generator());
|
||||
},
|
||||
|
||||
.@"gen-osc-valid" => {
|
||||
var gen: synthetic.Osc = .{
|
||||
.rand = rand,
|
||||
.p_valid = 1.0,
|
||||
};
|
||||
try generate(writer, gen.generator());
|
||||
},
|
||||
|
||||
.@"gen-osc-invalid" => {
|
||||
var gen: synthetic.Osc = .{
|
||||
.rand = rand,
|
||||
.p_valid = 0.0,
|
||||
};
|
||||
try generate(writer, gen.generator());
|
||||
},
|
||||
|
||||
.noop => try benchNoop(reader, buf),
|
||||
|
||||
// Handle the ones that depend on terminal state next
|
||||
@@ -145,75 +185,14 @@ pub fn main() !void {
|
||||
}
|
||||
}
|
||||
|
||||
/// Generates an infinite stream of random printable ASCII characters.
|
||||
/// This has no control characters in it at all.
|
||||
fn genAscii(writer: anytype, seed: u64) !void {
|
||||
const alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;':\\\",./<>?`~";
|
||||
try genData(writer, alphabet, seed);
|
||||
}
|
||||
|
||||
/// Generates an infinite stream of bytes from the given alphabet.
|
||||
fn genData(writer: anytype, alphabet: []const u8, seed: u64) !void {
|
||||
var prng = std.Random.DefaultPrng.init(seed);
|
||||
const rnd = prng.random();
|
||||
fn generate(
|
||||
writer: anytype,
|
||||
gen: synthetic.Generator,
|
||||
) !void {
|
||||
var buf: [1024]u8 = undefined;
|
||||
while (true) {
|
||||
for (&buf) |*c| {
|
||||
const idx = rnd.uintLessThanBiased(usize, alphabet.len);
|
||||
c.* = alphabet[idx];
|
||||
}
|
||||
|
||||
writer.writeAll(&buf) catch |err| switch (err) {
|
||||
error.BrokenPipe => return, // stdout closed
|
||||
else => return err,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn genUtf8(writer: anytype, seed: u64) !void {
|
||||
var prng = std.Random.DefaultPrng.init(seed);
|
||||
const rnd = prng.random();
|
||||
var buf: [1024]u8 = undefined;
|
||||
while (true) {
|
||||
var i: usize = 0;
|
||||
while (i <= buf.len - 4) {
|
||||
const cp: u18 = while (true) {
|
||||
const cp = rnd.int(u18);
|
||||
if (ziglyph.isPrint(cp)) break cp;
|
||||
};
|
||||
|
||||
i += try std.unicode.utf8Encode(cp, buf[i..]);
|
||||
}
|
||||
|
||||
writer.writeAll(buf[0..i]) catch |err| switch (err) {
|
||||
error.BrokenPipe => return, // stdout closed
|
||||
else => return err,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn genOsc(writer: anytype, seed: u64, p_valid: f64) !void {
|
||||
var prng = std.Random.DefaultPrng.init(seed);
|
||||
const gen: synth.OSC = .{ .rand = prng.random(), .p_valid = p_valid };
|
||||
|
||||
var buf: [1024]u8 = undefined;
|
||||
while (true) {
|
||||
const seq = try gen.next(&buf);
|
||||
writer.writeAll(seq) catch |err| switch (err) {
|
||||
error.BrokenPipe => return, // stdout closed
|
||||
else => return err,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
fn genRand(writer: anytype, seed: u64) !void {
|
||||
var prng = std.Random.DefaultPrng.init(seed);
|
||||
const rnd = prng.random();
|
||||
var buf: [1024]u8 = undefined;
|
||||
while (true) {
|
||||
rnd.bytes(&buf);
|
||||
|
||||
writer.writeAll(&buf) catch |err| switch (err) {
|
||||
const data = try gen.next(&buf);
|
||||
writer.writeAll(data) catch |err| switch (err) {
|
||||
error.BrokenPipe => return, // stdout closed
|
||||
else => return err,
|
||||
};
|
||||
|
@@ -1,15 +0,0 @@
|
||||
//! Package synth contains functions for generating synthetic data for
|
||||
//! the purpose of benchmarking, primarily. This can also probably be used
|
||||
//! for testing and fuzzing (probably generating a corpus rather than
|
||||
//! directly fuzzing) and more.
|
||||
//!
|
||||
//! The synthetic data generators in this package are usually not performant
|
||||
//! enough to be streamed in real time. They should instead be used to
|
||||
//! generate a large amount of data in a single go and then streamed
|
||||
//! from there.
|
||||
|
||||
pub const OSC = @import("osc.zig").Generator;
|
||||
|
||||
test {
|
||||
@import("std").testing.refAllDecls(@This());
|
||||
}
|
@@ -1,197 +0,0 @@
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
/// Synthetic OSC request generator.
|
||||
///
|
||||
/// I tried to balance generality and practicality. I implemented mainly
|
||||
/// all I need at the time of writing this, but I think this can be iterated
|
||||
/// over time to be a general purpose OSC generator with a lot of
|
||||
/// configurability. I limited the configurability to what I need but still
|
||||
/// tried to lay out the code in a way that it can be extended easily.
|
||||
pub const Generator = struct {
|
||||
/// Random number generator.
|
||||
rand: std.Random,
|
||||
|
||||
/// Probability of a valid OSC sequence being generated.
|
||||
p_valid: f64 = 1.0,
|
||||
|
||||
pub const Error = error{NoSpaceLeft};
|
||||
|
||||
/// We use a FBS as a direct parameter below in non-pub functions,
|
||||
/// but we should probably just switch to `[]u8`.
|
||||
const FBS = std.io.FixedBufferStream([]u8);
|
||||
|
||||
/// Get the next OSC request in bytes. The generated OSC request will
|
||||
/// have the prefix `ESC ]` and the terminator `BEL` (0x07).
|
||||
///
|
||||
/// This will generate both valid and invalid OSC requests (based on
|
||||
/// the `p_valid` probability value). Invalid requests still have the
|
||||
/// prefix and terminator, but the content in between is not a valid
|
||||
/// OSC request.
|
||||
///
|
||||
/// The buffer must be at least 3 bytes long to accommodate the
|
||||
/// prefix and terminator.
|
||||
pub fn next(self: *const Generator, buf: []u8) Error![]const u8 {
|
||||
assert(buf.len >= 3);
|
||||
var fbs: FBS = std.io.fixedBufferStream(buf);
|
||||
const writer = fbs.writer();
|
||||
|
||||
// Start OSC (ESC ])
|
||||
try writer.writeAll("\x1b]");
|
||||
|
||||
// Determine if we are generating a valid or invalid OSC request.
|
||||
switch (self.chooseValidity()) {
|
||||
.valid => try self.nextValid(&fbs),
|
||||
.invalid => try self.nextInvalid(&fbs),
|
||||
}
|
||||
|
||||
// Terminate OSC
|
||||
try writer.writeAll("\x07");
|
||||
return fbs.getWritten();
|
||||
}
|
||||
|
||||
fn nextValid(self: *const Generator, fbs: *FBS) Error!void {
|
||||
try self.nextValidExact(fbs, self.rand.enumValue(ValidKind));
|
||||
}
|
||||
|
||||
fn nextValidExact(self: *const Generator, fbs: *FBS, k: ValidKind) Error!void {
|
||||
switch (k) {
|
||||
.change_window_title => {
|
||||
try fbs.writer().writeAll("0;"); // Set window title
|
||||
try self.randomBytes(fbs, 1, fbs.buffer.len);
|
||||
},
|
||||
|
||||
.prompt_start => {
|
||||
try fbs.writer().writeAll("133;A"); // Start prompt
|
||||
|
||||
// aid
|
||||
if (self.rand.boolean()) {
|
||||
try fbs.writer().writeAll(";aid=");
|
||||
try self.randomBytes(fbs, 1, 16);
|
||||
}
|
||||
|
||||
// redraw
|
||||
if (self.rand.boolean()) {
|
||||
try fbs.writer().writeAll(";redraw=");
|
||||
if (self.rand.boolean()) {
|
||||
try fbs.writer().writeAll("1");
|
||||
} else {
|
||||
try fbs.writer().writeAll("0");
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
.prompt_end => try fbs.writer().writeAll("133;B"), // End prompt
|
||||
}
|
||||
}
|
||||
|
||||
fn nextInvalid(self: *const Generator, fbs: *FBS) Error!void {
|
||||
switch (self.rand.enumValue(InvalidKind)) {
|
||||
.random => try self.randomBytes(fbs, 1, fbs.buffer.len),
|
||||
.good_prefix => {
|
||||
try fbs.writer().writeAll("133;");
|
||||
try self.randomBytes(fbs, 2, fbs.buffer.len);
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a random string of bytes up to `max_len` bytes or
|
||||
/// until we run out of space in the buffer, whichever is
|
||||
/// smaller.
|
||||
///
|
||||
/// This will avoid the terminator characters (0x1B and 0x07) and
|
||||
/// replace them by incrementing them by one.
|
||||
fn randomBytes(
|
||||
self: *const Generator,
|
||||
fbs: *FBS,
|
||||
min_len: usize,
|
||||
max_len: usize,
|
||||
) Error!void {
|
||||
const len = @min(
|
||||
self.rand.intRangeAtMostBiased(usize, min_len, max_len),
|
||||
fbs.buffer.len - fbs.pos - 1, // leave space for terminator
|
||||
);
|
||||
var rem: usize = len;
|
||||
var buf: [1024]u8 = undefined;
|
||||
while (rem > 0) {
|
||||
self.rand.bytes(&buf);
|
||||
std.mem.replaceScalar(u8, &buf, 0x1B, 0x1C);
|
||||
std.mem.replaceScalar(u8, &buf, 0x07, 0x08);
|
||||
|
||||
const n = @min(rem, buf.len);
|
||||
try fbs.writer().writeAll(buf[0..n]);
|
||||
rem -= n;
|
||||
}
|
||||
}
|
||||
|
||||
/// Choose whether to generate a valid or invalid OSC request based
|
||||
/// on the validity probability.
|
||||
fn chooseValidity(self: *const Generator) Validity {
|
||||
return if (self.rand.float(f64) > self.p_valid)
|
||||
.invalid
|
||||
else
|
||||
.valid;
|
||||
}
|
||||
|
||||
const Validity = enum { valid, invalid };
|
||||
|
||||
const ValidKind = enum {
|
||||
change_window_title,
|
||||
prompt_start,
|
||||
prompt_end,
|
||||
};
|
||||
|
||||
const InvalidKind = enum {
|
||||
/// Literally random bytes. Might even be valid, but probably not.
|
||||
random,
|
||||
|
||||
/// A good prefix, but ultimately invalid format.
|
||||
good_prefix,
|
||||
};
|
||||
};
|
||||
|
||||
/// A fixed seed we can use for our tests to avoid flakes.
|
||||
const test_seed = 0xC0FFEEEEEEEEEEEE;
|
||||
|
||||
test "OSC generator" {
|
||||
var prng = std.Random.DefaultPrng.init(test_seed);
|
||||
var buf: [4096]u8 = undefined;
|
||||
const gen: Generator = .{ .rand = prng.random() };
|
||||
for (0..50) |_| _ = try gen.next(&buf);
|
||||
}
|
||||
|
||||
test "OSC generator valid" {
|
||||
const testing = std.testing;
|
||||
const terminal = @import("../../terminal/main.zig");
|
||||
|
||||
var prng = std.Random.DefaultPrng.init(test_seed);
|
||||
var buf: [256]u8 = undefined;
|
||||
const gen: Generator = .{
|
||||
.rand = prng.random(),
|
||||
.p_valid = 1.0,
|
||||
};
|
||||
for (0..50) |_| {
|
||||
const seq = try gen.next(&buf);
|
||||
var parser: terminal.osc.Parser = .{};
|
||||
for (seq[2 .. seq.len - 1]) |c| parser.next(c);
|
||||
try testing.expect(parser.end(null) != null);
|
||||
}
|
||||
}
|
||||
|
||||
test "OSC generator invalid" {
|
||||
const testing = std.testing;
|
||||
const terminal = @import("../../terminal/main.zig");
|
||||
|
||||
var prng = std.Random.DefaultPrng.init(test_seed);
|
||||
var buf: [256]u8 = undefined;
|
||||
const gen: Generator = .{
|
||||
.rand = prng.random(),
|
||||
.p_valid = 0.0,
|
||||
};
|
||||
for (0..50) |_| {
|
||||
const seq = try gen.next(&buf);
|
||||
var parser: terminal.osc.Parser = .{};
|
||||
for (seq[2 .. seq.len - 1]) |c| parser.next(c);
|
||||
try testing.expect(parser.end(null) == null);
|
||||
}
|
||||
}
|
@@ -182,12 +182,12 @@ test {
|
||||
_ = @import("surface_mouse.zig");
|
||||
|
||||
// Libraries
|
||||
_ = @import("bench/synth/main.zig");
|
||||
_ = @import("crash/main.zig");
|
||||
_ = @import("datastruct/main.zig");
|
||||
_ = @import("inspector/main.zig");
|
||||
_ = @import("terminal/main.zig");
|
||||
_ = @import("terminfo/main.zig");
|
||||
_ = @import("simd/main.zig");
|
||||
_ = @import("synthetic/main.zig");
|
||||
_ = @import("unicode/main.zig");
|
||||
}
|
||||
|
53
src/synthetic/Bytes.zig
Normal file
53
src/synthetic/Bytes.zig
Normal file
@@ -0,0 +1,53 @@
|
||||
/// Generates bytes.
|
||||
const Bytes = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const Generator = @import("Generator.zig");
|
||||
|
||||
/// Random number generator.
|
||||
rand: std.Random,
|
||||
|
||||
/// The minimum and maximum length of the generated bytes. The maximum
|
||||
/// length will be capped to the length of the buffer passed in if the
|
||||
/// buffer length is smaller.
|
||||
min_len: usize = 1,
|
||||
max_len: usize = std.math.maxInt(usize),
|
||||
|
||||
/// The possible bytes that can be generated. If a byte is duplicated
|
||||
/// in the alphabet, it will be more likely to be generated. That's a
|
||||
/// side effect of the generator, not an intended use case.
|
||||
alphabet: ?[]const u8 = null,
|
||||
|
||||
/// Predefined alphabets.
|
||||
pub const Alphabet = struct {
|
||||
pub const ascii = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;':\\\",./<>?`~";
|
||||
};
|
||||
|
||||
pub fn generator(self: *Bytes) Generator {
|
||||
return .init(self, next);
|
||||
}
|
||||
|
||||
pub fn next(self: *Bytes, buf: []u8) Generator.Error![]const u8 {
|
||||
const len = @min(
|
||||
self.rand.intRangeAtMostBiased(usize, self.min_len, self.max_len),
|
||||
buf.len,
|
||||
);
|
||||
|
||||
const result = buf[0..len];
|
||||
self.rand.bytes(result);
|
||||
if (self.alphabet) |alphabet| {
|
||||
for (result) |*byte| byte.* = alphabet[byte.* % alphabet.len];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
test "bytes" {
|
||||
const testing = std.testing;
|
||||
var prng = std.Random.DefaultPrng.init(0);
|
||||
var buf: [256]u8 = undefined;
|
||||
var v: Bytes = .{ .rand = prng.random() };
|
||||
const gen = v.generator();
|
||||
const result = try gen.next(&buf);
|
||||
try testing.expect(result.len > 0);
|
||||
}
|
42
src/synthetic/Generator.zig
Normal file
42
src/synthetic/Generator.zig
Normal file
@@ -0,0 +1,42 @@
|
||||
/// A common interface for all generators.
|
||||
const Generator = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
/// For generators, this is the only error that is allowed to be
|
||||
/// returned by the next function.
|
||||
pub const Error = error{NoSpaceLeft};
|
||||
|
||||
/// The vtable for the generator.
|
||||
ptr: *anyopaque,
|
||||
nextFn: *const fn (ptr: *anyopaque, buf: []u8) Error![]const u8,
|
||||
|
||||
/// Create a new generator from a pointer and a function pointer.
|
||||
/// This usually is only called by generator implementations, not
|
||||
/// generator users.
|
||||
pub fn init(
|
||||
pointer: anytype,
|
||||
comptime nextFn: fn (ptr: @TypeOf(pointer), buf: []u8) Error![]const u8,
|
||||
) Generator {
|
||||
const Ptr = @TypeOf(pointer);
|
||||
assert(@typeInfo(Ptr) == .pointer); // Must be a pointer
|
||||
assert(@typeInfo(Ptr).pointer.size == .one); // Must be a single-item pointer
|
||||
assert(@typeInfo(@typeInfo(Ptr).pointer.child) == .@"struct"); // Must point to a struct
|
||||
const gen = struct {
|
||||
fn next(ptr: *anyopaque, buf: []u8) Error![]const u8 {
|
||||
const self: Ptr = @ptrCast(@alignCast(ptr));
|
||||
return try nextFn(self, buf);
|
||||
}
|
||||
};
|
||||
|
||||
return .{
|
||||
.ptr = pointer,
|
||||
.nextFn = gen.next,
|
||||
};
|
||||
}
|
||||
|
||||
/// Get the next value from the generator. Returns the data written.
|
||||
pub fn next(self: Generator, buf: []u8) Error![]const u8 {
|
||||
return try self.nextFn(self.ptr, buf);
|
||||
}
|
221
src/synthetic/Osc.zig
Normal file
221
src/synthetic/Osc.zig
Normal file
@@ -0,0 +1,221 @@
|
||||
/// Generates random terminal OSC requests.
|
||||
const Osc = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Generator = @import("Generator.zig");
|
||||
const Bytes = @import("Bytes.zig");
|
||||
|
||||
/// Valid OSC request kinds that can be generated.
|
||||
pub const ValidKind = enum {
|
||||
change_window_title,
|
||||
prompt_start,
|
||||
prompt_end,
|
||||
};
|
||||
|
||||
/// Invalid OSC request kinds that can be generated.
|
||||
pub const InvalidKind = enum {
|
||||
/// Literally random bytes. Might even be valid, but probably not.
|
||||
random,
|
||||
|
||||
/// A good prefix, but ultimately invalid format.
|
||||
good_prefix,
|
||||
};
|
||||
|
||||
/// Random number generator.
|
||||
rand: std.Random,
|
||||
|
||||
/// Probability of a valid OSC sequence being generated.
|
||||
p_valid: f64 = 1.0,
|
||||
|
||||
/// Probabilities of specific valid or invalid OSC request kinds.
|
||||
/// The probabilities are weighted relative to each other, so they
|
||||
/// can sum greater than 1.0. A kind of weight 1.0 and a kind of
|
||||
/// weight 2.0 will have a 2:1 chance of the latter being selected.
|
||||
p_valid_kind: std.enums.EnumArray(ValidKind, f64) = .initFill(1.0),
|
||||
p_invalid_kind: std.enums.EnumArray(InvalidKind, f64) = .initFill(1.0),
|
||||
|
||||
/// The alphabet for random bytes (omitting 0x1B and 0x07).
|
||||
const bytes_alphabet: []const u8 = alphabet: {
|
||||
var alphabet: [256]u8 = undefined;
|
||||
for (0..alphabet.len) |i| {
|
||||
if (i == 0x1B or i == 0x07) {
|
||||
alphabet[i] = @intCast(i + 1);
|
||||
} else {
|
||||
alphabet[i] = @intCast(i);
|
||||
}
|
||||
}
|
||||
const result = alphabet;
|
||||
break :alphabet &result;
|
||||
};
|
||||
|
||||
pub fn generator(self: *Osc) Generator {
|
||||
return .init(self, next);
|
||||
}
|
||||
|
||||
/// Get the next OSC request in bytes. The generated OSC request will
|
||||
/// have the prefix `ESC ]` and the terminator `BEL` (0x07).
|
||||
///
|
||||
/// This will generate both valid and invalid OSC requests (based on
|
||||
/// the `p_valid` probability value). Invalid requests still have the
|
||||
/// prefix and terminator, but the content in between is not a valid
|
||||
/// OSC request.
|
||||
///
|
||||
/// The buffer must be at least 3 bytes long to accommodate the
|
||||
/// prefix and terminator.
|
||||
pub fn next(self: *Osc, buf: []u8) Generator.Error![]const u8 {
|
||||
if (buf.len < 3) return error.NoSpaceLeft;
|
||||
const unwrapped = try self.nextUnwrapped(buf[2 .. buf.len - 1]);
|
||||
buf[0] = 0x1B; // ESC
|
||||
buf[1] = ']';
|
||||
buf[unwrapped.len + 2] = 0x07; // BEL
|
||||
return buf[0 .. unwrapped.len + 3];
|
||||
}
|
||||
|
||||
fn nextUnwrapped(self: *Osc, buf: []u8) Generator.Error![]const u8 {
|
||||
return switch (self.chooseValidity()) {
|
||||
.valid => valid: {
|
||||
const Indexer = @TypeOf(self.p_valid_kind).Indexer;
|
||||
const idx = self.rand.weightedIndex(f64, &self.p_valid_kind.values);
|
||||
break :valid try self.nextUnwrappedValidExact(
|
||||
buf,
|
||||
Indexer.keyForIndex(idx),
|
||||
);
|
||||
},
|
||||
|
||||
.invalid => invalid: {
|
||||
const Indexer = @TypeOf(self.p_invalid_kind).Indexer;
|
||||
const idx = self.rand.weightedIndex(f64, &self.p_invalid_kind.values);
|
||||
break :invalid try self.nextUnwrappedInvalidExact(
|
||||
buf,
|
||||
Indexer.keyForIndex(idx),
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
fn nextUnwrappedValidExact(self: *const Osc, buf: []u8, k: ValidKind) Generator.Error![]const u8 {
|
||||
var fbs = std.io.fixedBufferStream(buf);
|
||||
switch (k) {
|
||||
.change_window_title => {
|
||||
try fbs.writer().writeAll("0;"); // Set window title
|
||||
var bytes_gen = self.bytes();
|
||||
const title = try bytes_gen.next(fbs.buffer[fbs.pos..]);
|
||||
try fbs.seekBy(@intCast(title.len));
|
||||
},
|
||||
|
||||
.prompt_start => {
|
||||
try fbs.writer().writeAll("133;A"); // Start prompt
|
||||
|
||||
// aid
|
||||
if (self.rand.boolean()) {
|
||||
var bytes_gen = self.bytes();
|
||||
bytes_gen.max_len = 16;
|
||||
try fbs.writer().writeAll(";aid=");
|
||||
const aid = try bytes_gen.next(fbs.buffer[fbs.pos..]);
|
||||
try fbs.seekBy(@intCast(aid.len));
|
||||
}
|
||||
|
||||
// redraw
|
||||
if (self.rand.boolean()) {
|
||||
try fbs.writer().writeAll(";redraw=");
|
||||
if (self.rand.boolean()) {
|
||||
try fbs.writer().writeAll("1");
|
||||
} else {
|
||||
try fbs.writer().writeAll("0");
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
.prompt_end => try fbs.writer().writeAll("133;B"), // End prompt
|
||||
}
|
||||
|
||||
return fbs.getWritten();
|
||||
}
|
||||
|
||||
fn nextUnwrappedInvalidExact(
|
||||
self: *const Osc,
|
||||
buf: []u8,
|
||||
k: InvalidKind,
|
||||
) Generator.Error![]const u8 {
|
||||
switch (k) {
|
||||
.random => {
|
||||
var bytes_gen = self.bytes();
|
||||
return try bytes_gen.next(buf);
|
||||
},
|
||||
|
||||
.good_prefix => {
|
||||
var fbs = std.io.fixedBufferStream(buf);
|
||||
try fbs.writer().writeAll("133;");
|
||||
var bytes_gen = self.bytes();
|
||||
const data = try bytes_gen.next(fbs.buffer[fbs.pos..]);
|
||||
try fbs.seekBy(@intCast(data.len));
|
||||
return fbs.getWritten();
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn bytes(self: *const Osc) Bytes {
|
||||
return .{
|
||||
.rand = self.rand,
|
||||
.alphabet = bytes_alphabet,
|
||||
};
|
||||
}
|
||||
|
||||
/// Choose whether to generate a valid or invalid OSC request based
|
||||
/// on the validity probability.
|
||||
fn chooseValidity(self: *const Osc) Validity {
|
||||
return if (self.rand.float(f64) > self.p_valid)
|
||||
.invalid
|
||||
else
|
||||
.valid;
|
||||
}
|
||||
|
||||
const Validity = enum { valid, invalid };
|
||||
|
||||
/// A fixed seed we can use for our tests to avoid flakes.
|
||||
const test_seed = 0xC0FFEEEEEEEEEEEE;
|
||||
|
||||
test "OSC generator" {
|
||||
var prng = std.Random.DefaultPrng.init(test_seed);
|
||||
var buf: [4096]u8 = undefined;
|
||||
var v: Osc = .{ .rand = prng.random() };
|
||||
const gen = v.generator();
|
||||
for (0..50) |_| _ = try gen.next(&buf);
|
||||
}
|
||||
|
||||
test "OSC generator valid" {
|
||||
const testing = std.testing;
|
||||
const terminal = @import("../terminal/main.zig");
|
||||
|
||||
var prng = std.Random.DefaultPrng.init(test_seed);
|
||||
var buf: [256]u8 = undefined;
|
||||
var gen: Osc = .{
|
||||
.rand = prng.random(),
|
||||
.p_valid = 1.0,
|
||||
};
|
||||
for (0..50) |_| {
|
||||
const seq = try gen.next(&buf);
|
||||
var parser: terminal.osc.Parser = .{};
|
||||
for (seq[2 .. seq.len - 1]) |c| parser.next(c);
|
||||
try testing.expect(parser.end(null) != null);
|
||||
}
|
||||
}
|
||||
|
||||
test "OSC generator invalid" {
|
||||
const testing = std.testing;
|
||||
const terminal = @import("../terminal/main.zig");
|
||||
|
||||
var prng = std.Random.DefaultPrng.init(test_seed);
|
||||
var buf: [256]u8 = undefined;
|
||||
var gen: Osc = .{
|
||||
.rand = prng.random(),
|
||||
.p_valid = 0.0,
|
||||
};
|
||||
for (0..50) |_| {
|
||||
const seq = try gen.next(&buf);
|
||||
var parser: terminal.osc.Parser = .{};
|
||||
for (seq[2 .. seq.len - 1]) |c| parser.next(c);
|
||||
try testing.expect(parser.end(null) == null);
|
||||
}
|
||||
}
|
103
src/synthetic/Utf8.zig
Normal file
103
src/synthetic/Utf8.zig
Normal file
@@ -0,0 +1,103 @@
|
||||
/// Generates UTF-8.
|
||||
///
|
||||
/// This doesn't yet generate multi-codepoint graphemes, but it
|
||||
/// has the ability to generate a custom distribution of UTF-8
|
||||
/// encoding lengths (1, 2, 3, or 4 bytes).
|
||||
const Utf8 = @This();
|
||||
|
||||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Generator = @import("Generator.zig");
|
||||
|
||||
/// Possible UTF-8 encoding lengths.
|
||||
pub const Utf8Len = enum(u3) {
|
||||
one = 1,
|
||||
two = 2,
|
||||
three = 3,
|
||||
four = 4,
|
||||
};
|
||||
|
||||
/// Random number generator.
|
||||
rand: std.Random,
|
||||
|
||||
/// The minimum and maximum length of the generated bytes. The maximum
|
||||
/// length will be capped to the length of the buffer passed in if the
|
||||
/// buffer length is smaller.
|
||||
min_len: usize = 1,
|
||||
max_len: usize = std.math.maxInt(usize),
|
||||
|
||||
/// Probability of a specific UTF-8 encoding length being generated.
|
||||
/// The probabilities are weighted relative to each other, so they
|
||||
/// can sum greater than 1.0. A length of weight 1.0 and a length
|
||||
/// of weight 2.0 will have a 2:1 chance of the latter being
|
||||
/// selected.
|
||||
///
|
||||
/// If a UTF-8 encoding of a chosen length can't fit into the remaining
|
||||
/// buffer, a smaller length will be chosen. For small buffers this may
|
||||
/// skew the distribution of lengths.
|
||||
p_length: std.enums.EnumArray(Utf8Len, f64) = .initFill(1.0),
|
||||
|
||||
pub fn generator(self: *Utf8) Generator {
|
||||
return .init(self, next);
|
||||
}
|
||||
|
||||
pub fn next(self: *Utf8, buf: []u8) Generator.Error![]const u8 {
|
||||
const len = @min(
|
||||
self.rand.intRangeAtMostBiased(usize, self.min_len, self.max_len),
|
||||
buf.len,
|
||||
);
|
||||
|
||||
const result = buf[0..len];
|
||||
var rem: usize = len;
|
||||
while (rem > 0) {
|
||||
// Pick a utf8 byte count to generate.
|
||||
const utf8_len: Utf8Len = len: {
|
||||
const Indexer = @TypeOf(self.p_length).Indexer;
|
||||
const idx = self.rand.weightedIndex(f64, &self.p_length.values);
|
||||
var utf8_len = Indexer.keyForIndex(idx);
|
||||
assert(rem > 0);
|
||||
while (@intFromEnum(utf8_len) > rem) {
|
||||
// If the chosen length can't fit into the remaining buffer,
|
||||
// choose a smaller length.
|
||||
utf8_len = @enumFromInt(@intFromEnum(utf8_len) - 1);
|
||||
}
|
||||
break :len utf8_len;
|
||||
};
|
||||
|
||||
// Generate a UTF-8 sequence that encodes to this length.
|
||||
const cp: u21 = switch (utf8_len) {
|
||||
.one => self.rand.intRangeAtMostBiased(u21, 0x00, 0x7F),
|
||||
.two => self.rand.intRangeAtMostBiased(u21, 0x80, 0x7FF),
|
||||
.three => self.rand.intRangeAtMostBiased(u21, 0x800, 0xFFFF),
|
||||
.four => self.rand.intRangeAtMostBiased(u21, 0x10000, 0x10FFFF),
|
||||
};
|
||||
|
||||
assert(std.unicode.utf8CodepointSequenceLength(
|
||||
cp,
|
||||
) catch unreachable == @intFromEnum(utf8_len));
|
||||
rem -= std.unicode.utf8Encode(
|
||||
cp,
|
||||
result[result.len - rem ..],
|
||||
) catch |err| switch (err) {
|
||||
// Impossible because our generation above is hardcoded to
|
||||
// produce a valid range. If not, a bug.
|
||||
error.CodepointTooLarge => unreachable,
|
||||
|
||||
// Possible, in which case we redo the loop and encode nothing.
|
||||
error.Utf8CannotEncodeSurrogateHalf => continue,
|
||||
};
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
test "utf8" {
|
||||
const testing = std.testing;
|
||||
var prng = std.Random.DefaultPrng.init(0);
|
||||
var buf: [256]u8 = undefined;
|
||||
var v: Utf8 = .{ .rand = prng.random() };
|
||||
const gen = v.generator();
|
||||
const result = try gen.next(&buf);
|
||||
try testing.expect(result.len > 0);
|
||||
try testing.expect(std.unicode.utf8ValidateSlice(result));
|
||||
}
|
23
src/synthetic/main.zig
Normal file
23
src/synthetic/main.zig
Normal file
@@ -0,0 +1,23 @@
|
||||
//! The synthetic package contains an abstraction for generating
|
||||
//! synthetic data. The motivating use case for this package is to
|
||||
//! generate synthetic data for benchmarking, but it may also expand
|
||||
//! to other use cases such as fuzzing (e.g. to generate a corpus
|
||||
//! rather than directly fuzzing).
|
||||
//!
|
||||
//! The generators in this package are typically not performant
|
||||
//! enough to be streamed in real time. They should instead be
|
||||
//! used to generate a large amount of data in a single go
|
||||
//! and then streamed from there.
|
||||
//!
|
||||
//! The generators are aimed for terminal emulation, but the package
|
||||
//! is not limited to that and we may want to extract this to a
|
||||
//! standalone package one day.
|
||||
|
||||
pub const Generator = @import("Generator.zig");
|
||||
pub const Bytes = @import("Bytes.zig");
|
||||
pub const Utf8 = @import("Utf8.zig");
|
||||
pub const Osc = @import("Osc.zig");
|
||||
|
||||
test {
|
||||
@import("std").testing.refAllDecls(@This());
|
||||
}
|
Reference in New Issue
Block a user