From 3dde6e2559e0aa67e04a6001485d87b80ed4c1dd Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Wed, 4 Mar 2026 20:37:16 -0800 Subject: [PATCH] terminal: bound link regex search work with Oniguruma retry limits Fixes #11177 Use per-search Oniguruma match params (retry_limit_in_search) in StringMap-backed link detection to avoid pathological backtracking hangs on very long lines. The units are ticks in the internal loop so its kind of opaque but this seems to still match some very long URLs. The test case in question was a 169K character line (which is now rejected). --- pkg/oniguruma/main.zig | 2 + pkg/oniguruma/match_param.zig | 23 ++++++++++ pkg/oniguruma/regex.zig | 79 ++++++++++++++++++++++++++++++----- src/terminal/StringMap.zig | 25 ++++++++++- 4 files changed, 117 insertions(+), 12 deletions(-) create mode 100644 pkg/oniguruma/match_param.zig diff --git a/pkg/oniguruma/main.zig b/pkg/oniguruma/main.zig index a8e415cfb..2541cc358 100644 --- a/pkg/oniguruma/main.zig +++ b/pkg/oniguruma/main.zig @@ -1,4 +1,5 @@ const initpkg = @import("init.zig"); +const match_param = @import("match_param.zig"); const regex = @import("regex.zig"); const region = @import("region.zig"); const types = @import("types.zig"); @@ -10,6 +11,7 @@ pub const errors = @import("errors.zig"); pub const init = initpkg.init; pub const deinit = initpkg.deinit; pub const Encoding = types.Encoding; +pub const MatchParam = match_param.MatchParam; pub const Regex = regex.Regex; pub const Region = region.Region; pub const Syntax = types.Syntax; diff --git a/pkg/oniguruma/match_param.zig b/pkg/oniguruma/match_param.zig new file mode 100644 index 000000000..b28258ff0 --- /dev/null +++ b/pkg/oniguruma/match_param.zig @@ -0,0 +1,23 @@ +const c = @import("c.zig").c; +const errors = @import("errors.zig"); +const Error = errors.Error; + +pub const MatchParam = struct { + value: *c.OnigMatchParam, + + pub fn init() !MatchParam { + const value = c.onig_new_match_param() orelse return Error.Memory; + return .{ .value = value }; + } + + pub fn deinit(self: *MatchParam) void { + c.onig_free_match_param(self.value); + } + + pub fn setRetryLimitInSearch(self: *MatchParam, limit: usize) !void { + _ = try errors.convertError(c.onig_set_retry_limit_in_search_of_match_param( + self.value, + @intCast(limit), + )); + } +}; diff --git a/pkg/oniguruma/regex.zig b/pkg/oniguruma/regex.zig index a73c7fc10..fd920e01a 100644 --- a/pkg/oniguruma/regex.zig +++ b/pkg/oniguruma/regex.zig @@ -3,6 +3,7 @@ const c = @import("c.zig").c; const types = @import("types.zig"); const errors = @import("errors.zig"); const testEnsureInit = @import("testing.zig").ensureInit; +const MatchParam = @import("match_param.zig").MatchParam; const Region = @import("region.zig").Region; const Error = errors.Error; const ErrorInfo = errors.ErrorInfo; @@ -43,6 +44,17 @@ pub const Regex = struct { self: *Regex, str: []const u8, options: Option, + ) !Region { + return self.searchWithParam(str, options, null); + } + + /// Search an entire string for matches. This always returns a region + /// which may heap allocate (C allocator). + pub fn searchWithParam( + self: *Regex, + str: []const u8, + options: Option, + match_param: ?*MatchParam, ) !Region { var region: Region = .{}; @@ -51,7 +63,14 @@ pub const Regex = struct { // any errors to free that memory. errdefer region.deinit(); - _ = try self.searchAdvanced(str, 0, str.len, ®ion, options); + _ = try self.searchAdvancedWithParam( + str, + 0, + str.len, + ®ion, + options, + match_param, + ); return region; } @@ -64,15 +83,47 @@ pub const Regex = struct { region: *Region, options: Option, ) !usize { - const pos = try errors.convertError(c.onig_search( - self.value, - str.ptr, - str.ptr + str.len, - str.ptr + start, - str.ptr + end, - @ptrCast(region), - options.int(), - )); + return self.searchAdvancedWithParam( + str, + start, + end, + region, + options, + null, + ); + } + + /// onig_search_with_param directly + pub fn searchAdvancedWithParam( + self: *Regex, + str: []const u8, + start: usize, + end: usize, + region: *Region, + options: Option, + match_param: ?*MatchParam, + ) !usize { + const pos = try errors.convertError(if (match_param) |param| + c.onig_search_with_param( + self.value, + str.ptr, + str.ptr + str.len, + str.ptr + start, + str.ptr + end, + @ptrCast(region), + options.int(), + param.value, + ) + else + c.onig_search( + self.value, + str.ptr, + str.ptr + str.len, + str.ptr + start, + str.ptr + end, + @ptrCast(region), + options.int(), + )); return @intCast(pos); } @@ -90,4 +141,12 @@ test { try testing.expectEqual(@as(usize, 1), reg.count()); try testing.expectError(Error.Mismatch, re.search("hello", .{})); + + var match_param = try MatchParam.init(); + defer match_param.deinit(); + try match_param.setRetryLimitInSearch(1000); + + var reg_param = try re.searchWithParam("hello foo bar", .{}, &match_param); + defer reg_param.deinit(); + try testing.expectEqual(@as(usize, 1), reg_param.count()); } diff --git a/src/terminal/StringMap.zig b/src/terminal/StringMap.zig index f7d88d1c8..18dd7b19c 100644 --- a/src/terminal/StringMap.zig +++ b/src/terminal/StringMap.zig @@ -11,6 +11,12 @@ const Screen = @import("Screen.zig"); const Pin = @import("PageList.zig").Pin; const Allocator = std.mem.Allocator; +// Retry budget for StringMap regex searches. +// +// Units are Oniguruma retry steps (internal backtracking/retry counter), +// not bytes/characters/time. +const oni_search_retry_limit = 100_000; + string: [:0]const u8, map: []Pin, @@ -44,11 +50,26 @@ pub const SearchIterator = struct { pub fn next(self: *SearchIterator) !?Match { if (self.offset >= self.map.string.len) return null; - var region = self.regex.search( + // Use per-search match params so we can bound regex retry steps + // (Oniguruma's internal backtracking work counter). + var match_param = try oni.MatchParam.init(); + defer match_param.deinit(); + try match_param.setRetryLimitInSearch(oni_search_retry_limit); + + var region = self.regex.searchWithParam( self.map.string[self.offset..], .{}, + &match_param, ) catch |err| switch (err) { - error.Mismatch => { + // Retry/stack-limit errors mean we hit our work budget and + // aborted matching. + // For iterator callers this is equivalent to "no further matches". + error.Mismatch, + error.RetryLimitInMatchOver, + error.RetryLimitInSearchOver, + error.MatchStackLimitOver, + error.SubexpCallLimitInSearchOver, + => { self.offset = self.map.string.len; return null; },