diff --git a/pkg/oniguruma/main.zig b/pkg/oniguruma/main.zig index a8e415cfb..2541cc358 100644 --- a/pkg/oniguruma/main.zig +++ b/pkg/oniguruma/main.zig @@ -1,4 +1,5 @@ const initpkg = @import("init.zig"); +const match_param = @import("match_param.zig"); const regex = @import("regex.zig"); const region = @import("region.zig"); const types = @import("types.zig"); @@ -10,6 +11,7 @@ pub const errors = @import("errors.zig"); pub const init = initpkg.init; pub const deinit = initpkg.deinit; pub const Encoding = types.Encoding; +pub const MatchParam = match_param.MatchParam; pub const Regex = regex.Regex; pub const Region = region.Region; pub const Syntax = types.Syntax; diff --git a/pkg/oniguruma/match_param.zig b/pkg/oniguruma/match_param.zig new file mode 100644 index 000000000..b28258ff0 --- /dev/null +++ b/pkg/oniguruma/match_param.zig @@ -0,0 +1,23 @@ +const c = @import("c.zig").c; +const errors = @import("errors.zig"); +const Error = errors.Error; + +pub const MatchParam = struct { + value: *c.OnigMatchParam, + + pub fn init() !MatchParam { + const value = c.onig_new_match_param() orelse return Error.Memory; + return .{ .value = value }; + } + + pub fn deinit(self: *MatchParam) void { + c.onig_free_match_param(self.value); + } + + pub fn setRetryLimitInSearch(self: *MatchParam, limit: usize) !void { + _ = try errors.convertError(c.onig_set_retry_limit_in_search_of_match_param( + self.value, + @intCast(limit), + )); + } +}; diff --git a/pkg/oniguruma/regex.zig b/pkg/oniguruma/regex.zig index a73c7fc10..fd920e01a 100644 --- a/pkg/oniguruma/regex.zig +++ b/pkg/oniguruma/regex.zig @@ -3,6 +3,7 @@ const c = @import("c.zig").c; const types = @import("types.zig"); const errors = @import("errors.zig"); const testEnsureInit = @import("testing.zig").ensureInit; +const MatchParam = @import("match_param.zig").MatchParam; const Region = @import("region.zig").Region; const Error = errors.Error; const ErrorInfo = errors.ErrorInfo; @@ -43,6 +44,17 @@ pub const Regex = struct { self: *Regex, str: []const u8, options: Option, + ) !Region { + return self.searchWithParam(str, options, null); + } + + /// Search an entire string for matches. This always returns a region + /// which may heap allocate (C allocator). + pub fn searchWithParam( + self: *Regex, + str: []const u8, + options: Option, + match_param: ?*MatchParam, ) !Region { var region: Region = .{}; @@ -51,7 +63,14 @@ pub const Regex = struct { // any errors to free that memory. errdefer region.deinit(); - _ = try self.searchAdvanced(str, 0, str.len, ®ion, options); + _ = try self.searchAdvancedWithParam( + str, + 0, + str.len, + ®ion, + options, + match_param, + ); return region; } @@ -64,15 +83,47 @@ pub const Regex = struct { region: *Region, options: Option, ) !usize { - const pos = try errors.convertError(c.onig_search( - self.value, - str.ptr, - str.ptr + str.len, - str.ptr + start, - str.ptr + end, - @ptrCast(region), - options.int(), - )); + return self.searchAdvancedWithParam( + str, + start, + end, + region, + options, + null, + ); + } + + /// onig_search_with_param directly + pub fn searchAdvancedWithParam( + self: *Regex, + str: []const u8, + start: usize, + end: usize, + region: *Region, + options: Option, + match_param: ?*MatchParam, + ) !usize { + const pos = try errors.convertError(if (match_param) |param| + c.onig_search_with_param( + self.value, + str.ptr, + str.ptr + str.len, + str.ptr + start, + str.ptr + end, + @ptrCast(region), + options.int(), + param.value, + ) + else + c.onig_search( + self.value, + str.ptr, + str.ptr + str.len, + str.ptr + start, + str.ptr + end, + @ptrCast(region), + options.int(), + )); return @intCast(pos); } @@ -90,4 +141,12 @@ test { try testing.expectEqual(@as(usize, 1), reg.count()); try testing.expectError(Error.Mismatch, re.search("hello", .{})); + + var match_param = try MatchParam.init(); + defer match_param.deinit(); + try match_param.setRetryLimitInSearch(1000); + + var reg_param = try re.searchWithParam("hello foo bar", .{}, &match_param); + defer reg_param.deinit(); + try testing.expectEqual(@as(usize, 1), reg_param.count()); } diff --git a/src/terminal/StringMap.zig b/src/terminal/StringMap.zig index f7d88d1c8..18dd7b19c 100644 --- a/src/terminal/StringMap.zig +++ b/src/terminal/StringMap.zig @@ -11,6 +11,12 @@ const Screen = @import("Screen.zig"); const Pin = @import("PageList.zig").Pin; const Allocator = std.mem.Allocator; +// Retry budget for StringMap regex searches. +// +// Units are Oniguruma retry steps (internal backtracking/retry counter), +// not bytes/characters/time. +const oni_search_retry_limit = 100_000; + string: [:0]const u8, map: []Pin, @@ -44,11 +50,26 @@ pub const SearchIterator = struct { pub fn next(self: *SearchIterator) !?Match { if (self.offset >= self.map.string.len) return null; - var region = self.regex.search( + // Use per-search match params so we can bound regex retry steps + // (Oniguruma's internal backtracking work counter). + var match_param = try oni.MatchParam.init(); + defer match_param.deinit(); + try match_param.setRetryLimitInSearch(oni_search_retry_limit); + + var region = self.regex.searchWithParam( self.map.string[self.offset..], .{}, + &match_param, ) catch |err| switch (err) { - error.Mismatch => { + // Retry/stack-limit errors mean we hit our work budget and + // aborted matching. + // For iterator callers this is equivalent to "no further matches". + error.Mismatch, + error.RetryLimitInMatchOver, + error.RetryLimitInSearchOver, + error.MatchStackLimitOver, + error.SubexpCallLimitInSearchOver, + => { self.offset = self.map.string.len; return null; },