add utf8proc back for bench

This commit is contained in:
Mitchell Hashimoto
2024-02-08 13:21:36 -08:00
parent 7da82688b8
commit 4ae41579da
8 changed files with 112 additions and 0 deletions

37
pkg/utf8proc/build.zig Normal file
View File

@@ -0,0 +1,37 @@
const std = @import("std");
pub fn build(b: *std.Build) !void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
const module = b.addModule("utf8proc", .{ .root_source_file = .{ .path = "main.zig" } });
const upstream = b.dependency("utf8proc", .{});
const lib = b.addStaticLibrary(.{
.name = "utf8proc",
.target = target,
.optimize = optimize,
});
lib.linkLibC();
lib.addIncludePath(upstream.path(""));
module.addIncludePath(upstream.path(""));
var flags = std.ArrayList([]const u8).init(b.allocator);
try flags.append("-DUTF8PROC_EXPORTS");
defer flags.deinit();
lib.addCSourceFiles(.{
.dependency = upstream,
.files = &.{"utf8proc.c"},
.flags = flags.items,
});
lib.installHeadersDirectoryOptions(.{
.source_dir = upstream.path(""),
.install_dir = .header,
.install_subdir = "",
.include_extensions = &.{".h"},
});
b.installArtifact(lib);
}

View File

@@ -0,0 +1,11 @@
.{
.name = "utf8proc",
.version = "2.8.0",
.paths = .{""},
.dependencies = .{
.utf8proc = .{
.url = "https://github.com/JuliaStrings/utf8proc/archive/refs/tags/v2.8.0.tar.gz",
.hash = "1220056ce228a8c58f1fa66ab778f5c8965e62f720c1d30603c7d534cb7d8a605ad7",
},
},
}

3
pkg/utf8proc/c.zig Normal file
View File

@@ -0,0 +1,3 @@
pub usingnamespace @cImport({
@cInclude("utf8proc.h");
});

20
pkg/utf8proc/main.zig Normal file
View File

@@ -0,0 +1,20 @@
pub const c = @import("c.zig");
/// Given a codepoint, return a character width analogous to `wcwidth(codepoint)`,
/// except that a width of 0 is returned for non-printable codepoints
/// instead of -1 as in `wcwidth`.
pub fn charwidth(codepoint: u21) u8 {
return @intCast(c.utf8proc_charwidth(@intCast(codepoint)));
}
/// Given a pair of consecutive codepoints, return whether a grapheme break is
/// permitted between them (as defined by the extended grapheme clusters in UAX#29).
pub fn graphemeBreakStateful(cp1: u21, cp2: u21, state: *i32) bool {
return c.utf8proc_grapheme_break_stateful(
@intCast(cp1),
@intCast(cp2),
state,
);
}
test {}