terminal: bound link regex search work with Oniguruma retry limits

Fixes #11177

Use per-search Oniguruma match params (retry_limit_in_search) in
StringMap-backed link detection to avoid pathological backtracking hangs
on very long lines.

The units are ticks in the internal loop so its kind of opaque but
this seems to still match some very long URLs. The test case in question
was a 169K character line (which is now rejected).
This commit is contained in:
Mitchell Hashimoto
2026-03-04 20:37:16 -08:00
parent dfa968d932
commit 3dde6e2559
4 changed files with 117 additions and 12 deletions

View File

@@ -1,4 +1,5 @@
const initpkg = @import("init.zig");
const match_param = @import("match_param.zig");
const regex = @import("regex.zig");
const region = @import("region.zig");
const types = @import("types.zig");
@@ -10,6 +11,7 @@ pub const errors = @import("errors.zig");
pub const init = initpkg.init;
pub const deinit = initpkg.deinit;
pub const Encoding = types.Encoding;
pub const MatchParam = match_param.MatchParam;
pub const Regex = regex.Regex;
pub const Region = region.Region;
pub const Syntax = types.Syntax;

View File

@@ -0,0 +1,23 @@
const c = @import("c.zig").c;
const errors = @import("errors.zig");
const Error = errors.Error;
pub const MatchParam = struct {
value: *c.OnigMatchParam,
pub fn init() !MatchParam {
const value = c.onig_new_match_param() orelse return Error.Memory;
return .{ .value = value };
}
pub fn deinit(self: *MatchParam) void {
c.onig_free_match_param(self.value);
}
pub fn setRetryLimitInSearch(self: *MatchParam, limit: usize) !void {
_ = try errors.convertError(c.onig_set_retry_limit_in_search_of_match_param(
self.value,
@intCast(limit),
));
}
};

View File

@@ -3,6 +3,7 @@ const c = @import("c.zig").c;
const types = @import("types.zig");
const errors = @import("errors.zig");
const testEnsureInit = @import("testing.zig").ensureInit;
const MatchParam = @import("match_param.zig").MatchParam;
const Region = @import("region.zig").Region;
const Error = errors.Error;
const ErrorInfo = errors.ErrorInfo;
@@ -43,6 +44,17 @@ pub const Regex = struct {
self: *Regex,
str: []const u8,
options: Option,
) !Region {
return self.searchWithParam(str, options, null);
}
/// Search an entire string for matches. This always returns a region
/// which may heap allocate (C allocator).
pub fn searchWithParam(
self: *Regex,
str: []const u8,
options: Option,
match_param: ?*MatchParam,
) !Region {
var region: Region = .{};
@@ -51,7 +63,14 @@ pub const Regex = struct {
// any errors to free that memory.
errdefer region.deinit();
_ = try self.searchAdvanced(str, 0, str.len, &region, options);
_ = try self.searchAdvancedWithParam(
str,
0,
str.len,
&region,
options,
match_param,
);
return region;
}
@@ -64,15 +83,47 @@ pub const Regex = struct {
region: *Region,
options: Option,
) !usize {
const pos = try errors.convertError(c.onig_search(
self.value,
str.ptr,
str.ptr + str.len,
str.ptr + start,
str.ptr + end,
@ptrCast(region),
options.int(),
));
return self.searchAdvancedWithParam(
str,
start,
end,
region,
options,
null,
);
}
/// onig_search_with_param directly
pub fn searchAdvancedWithParam(
self: *Regex,
str: []const u8,
start: usize,
end: usize,
region: *Region,
options: Option,
match_param: ?*MatchParam,
) !usize {
const pos = try errors.convertError(if (match_param) |param|
c.onig_search_with_param(
self.value,
str.ptr,
str.ptr + str.len,
str.ptr + start,
str.ptr + end,
@ptrCast(region),
options.int(),
param.value,
)
else
c.onig_search(
self.value,
str.ptr,
str.ptr + str.len,
str.ptr + start,
str.ptr + end,
@ptrCast(region),
options.int(),
));
return @intCast(pos);
}
@@ -90,4 +141,12 @@ test {
try testing.expectEqual(@as(usize, 1), reg.count());
try testing.expectError(Error.Mismatch, re.search("hello", .{}));
var match_param = try MatchParam.init();
defer match_param.deinit();
try match_param.setRetryLimitInSearch(1000);
var reg_param = try re.searchWithParam("hello foo bar", .{}, &match_param);
defer reg_param.deinit();
try testing.expectEqual(@as(usize, 1), reg_param.count());
}

View File

@@ -11,6 +11,12 @@ const Screen = @import("Screen.zig");
const Pin = @import("PageList.zig").Pin;
const Allocator = std.mem.Allocator;
// Retry budget for StringMap regex searches.
//
// Units are Oniguruma retry steps (internal backtracking/retry counter),
// not bytes/characters/time.
const oni_search_retry_limit = 100_000;
string: [:0]const u8,
map: []Pin,
@@ -44,11 +50,26 @@ pub const SearchIterator = struct {
pub fn next(self: *SearchIterator) !?Match {
if (self.offset >= self.map.string.len) return null;
var region = self.regex.search(
// Use per-search match params so we can bound regex retry steps
// (Oniguruma's internal backtracking work counter).
var match_param = try oni.MatchParam.init();
defer match_param.deinit();
try match_param.setRetryLimitInSearch(oni_search_retry_limit);
var region = self.regex.searchWithParam(
self.map.string[self.offset..],
.{},
&match_param,
) catch |err| switch (err) {
error.Mismatch => {
// Retry/stack-limit errors mean we hit our work budget and
// aborted matching.
// For iterator callers this is equivalent to "no further matches".
error.Mismatch,
error.RetryLimitInMatchOver,
error.RetryLimitInSearchOver,
error.MatchStackLimitOver,
error.SubexpCallLimitInSearchOver,
=> {
self.offset = self.map.string.len;
return null;
},