add utf8proc back for bench

This commit is contained in:
Mitchell Hashimoto
2024-02-08 13:21:36 -08:00
parent 7da82688b8
commit 4ae41579da
8 changed files with 112 additions and 0 deletions

View File

@@ -45,6 +45,9 @@ const Mode = enum {
/// libc wcwidth
wcwidth,
/// Use utf8proc library to calculate the display width of each codepoint.
utf8proc,
/// Use ziglyph library to calculate the display width of each codepoint.
ziglyph,
@@ -76,6 +79,7 @@ pub fn main() !void {
switch (args.mode) {
.noop => try benchNoop(reader, buf),
.wcwidth => try benchWcwidth(reader, buf),
.utf8proc => try benchUtf8proc(reader, buf),
.ziglyph => try benchZiglyph(reader, buf),
.simd => try benchSimd(reader, buf),
}
@@ -124,6 +128,31 @@ noinline fn benchWcwidth(
}
}
noinline fn benchUtf8proc(
reader: anytype,
buf: []u8,
) !void {
const utf8proc = @import("utf8proc");
var d: UTF8Decoder = .{};
while (true) {
const n = try reader.read(buf);
if (n == 0) break;
// Using stream.next directly with a for loop applies a naive
// scalar approach.
for (buf[0..n]) |c| {
const cp_, const consumed = d.next(c);
assert(consumed);
if (cp_) |cp| {
const width = utf8proc.charwidth(cp);
// Write the width to the buffer to avoid it being compiled away
buf[0] = @intCast(width);
}
}
}
}
noinline fn benchZiglyph(
reader: anytype,
buf: []u8,