mirror of
https://github.com/ghostty-org/ghostty.git
synced 2026-06-03 18:34:50 +00:00
more clickable file path fixes (#10619)
This pull request addresses some of the remaining issues when matching `~`, `$VAR`, `.directory/`, and embedded commas. It does not address issues with embedded line breaks. The PR is split in multiple commits carefully applying a set of changes to 1. make the big regex more composable / readable 2. update some doc strings 3. add more test cases for the issues mentioned 4. two simple commits, each fixing the issues Changes: - **url: refactor regex into documented branches** Break up the big monolithic URL and path regex into named sub-pattern constants and compose the final expression from three commented branches: - URLs with a scheme - absolute or dot-relative paths - bare relative paths This commit only breaks up the regex. It keeps the existing matching behavior unchanged. - **url: update top-level comment** - **url: carefully extend test cases** Extend existing test cases with `~`, `$VAR`, and bare .-prefixed paths and embedded `,` comma handling. See following issue comments: - https://github.com/ghostty-org/ghostty/pull/10570#issuecomment-3853842036 - https://github.com/ghostty-org/ghostty/issues/1972#issuecomment-3859329233 - https://github.com/ghostty-org/ghostty/issues/1972#issuecomment-3857881196 - **url: remove `,` from path_chars** Related to #1972 Fixes an issue when paths have embedded comma, e.g.: shared/src/foo/SomeItem.m:12, shared/src/ with path_chars greedily consuming the rest of the string. Now file path matching stops at comma. Scheme URLs are unchanged and still using the comma. - **url: fix matching `~`, `$VAR`, `.directory/`** Related to #1972 This commit adds three new alternatives for `rooted_or_relative_path_prefix`: - `~/` - `$VAR` and - `.local/`, `.config/` etc. for dot-prefixed directory names Remaining commits fix edge cases one by one: - **url: fix mid-string dot partial matches** `"foo.local/share"` (was partial match) → now matches fully - **url: fix $-numeric character matches** `"$10/$20"` → no match - **url: fix partial match of mid string $-variable** `"foo/$BAR/baz"` (was partial match) → matches fully now - **url: fix incomplete $-numeric behavior** `"$10/bar.txt"` (was partial match) → but should not match at all
This commit is contained in:
@@ -1,15 +1,17 @@
|
||||
const std = @import("std");
|
||||
const oni = @import("oniguruma");
|
||||
|
||||
/// Default URL regex. This is used to detect URLs in terminal output.
|
||||
/// Default URL/path regex. This is used to detect URLs and file paths in
|
||||
/// terminal output.
|
||||
///
|
||||
/// This is here in the config package because one day the matchers will be
|
||||
/// configurable and this will be a default.
|
||||
///
|
||||
/// This regex is liberal in what it accepts after the scheme, with exceptions
|
||||
/// for URLs ending with . or ). Although such URLs are perfectly valid, it is
|
||||
/// common for text to contain URLs surrounded by parentheses (such as in
|
||||
/// Markdown links) or at the end of sentences. Therefore, this regex excludes
|
||||
/// them as follows:
|
||||
/// For scheme URLs, this regex is liberal in what it accepts after the scheme,
|
||||
/// with exceptions for URLs ending with . or ). Although such URLs are
|
||||
/// perfectly valid, it is common for text to contain URLs surrounded by
|
||||
/// parentheses (such as in Markdown links) or at the end of sentences.
|
||||
/// Therefore, this regex excludes them as follows:
|
||||
///
|
||||
/// 1. Do not match regexes ending with .
|
||||
/// 2. Do not match regexes ending with ), except for ones which contain a (
|
||||
@@ -22,12 +24,6 @@ const oni = @import("oniguruma");
|
||||
///
|
||||
/// There are many complicated cases where these heuristics break down, but
|
||||
/// handling them well requires a non-regex approach.
|
||||
pub const regex =
|
||||
"(?:" ++ url_schemes ++
|
||||
\\)(?:
|
||||
++ ipv6_url_pattern ++
|
||||
\\|[\w\-.~:/?#@!$&*+,;=%]+(?:[\(\[]\w*[\)\]])?)+(?<![,.])|(?:\.\.\/|\.\/|(?<!\w)\/)(?:(?=[\w\-.~:\/?#@!$&*+,;=%]*\.)[\w\-.~:\/?#@!$&*+,;=%]+(?: [\w\-.~:\/?#@!$&*+,;=%]*[\/.])*(?: +(?= *$))?|(?![\w\-.~:\/?#@!$&*+,;=%]*\.)[\w\-.~:\/?#@!$&*+,;=%]+(?: [\w\-.~:\/?#@!$&*+,;=%]+)*(?: +(?= *$))?)|[\w][\w\-.]*\/(?=[\w\-.~:\/?#@!$&*+,;=%]*\.)[\w\-.~:\/?#@!$&*+,;=%]+(?: [\w\-.~:\/?#@!$&*+,;=%]*[\/.])*(?: +(?= *$))?
|
||||
;
|
||||
const url_schemes =
|
||||
\\https?://|mailto:|ftp://|file:|ssh:|git://|ssh://|tel:|magnet:|ipfs://|ipns://|gemini://|gopher://|news:
|
||||
;
|
||||
@@ -36,6 +32,95 @@ const ipv6_url_pattern =
|
||||
\\(?:\[[:0-9a-fA-F]+(?:[:0-9a-fA-F]*)+\](?::[0-9]+)?)
|
||||
;
|
||||
|
||||
const scheme_url_chars =
|
||||
\\[\w\-.~:/?#@!$&*+,;=%]
|
||||
;
|
||||
|
||||
const path_chars =
|
||||
\\[\w\-.~:\/?#@!$&*+;=%]
|
||||
;
|
||||
|
||||
const optional_bracketed_word_suffix =
|
||||
\\(?:[\(\[]\w*[\)\]])?
|
||||
;
|
||||
|
||||
const no_trailing_punctuation =
|
||||
\\(?<![,.])
|
||||
;
|
||||
|
||||
const no_trailing_colon =
|
||||
\\(?<!:)
|
||||
;
|
||||
|
||||
const trailing_spaces_at_eol =
|
||||
\\(?: +(?= *$))?
|
||||
;
|
||||
|
||||
const dotted_path_lookahead =
|
||||
\\(?=[\w\-.~:\/?#@!$&*+;=%]*\.)
|
||||
;
|
||||
|
||||
const non_dotted_path_lookahead =
|
||||
\\(?![\w\-.~:\/?#@!$&*+;=%]*\.)
|
||||
;
|
||||
|
||||
const dotted_path_space_segments =
|
||||
\\(?:(?<!:) (?!\w+:\/\/)[\w\-.~:\/?#@!$&*+;=%]*[\/.])*
|
||||
;
|
||||
|
||||
const any_path_space_segments =
|
||||
\\(?:(?<!:) (?!\w+:\/\/)[\w\-.~:\/?#@!$&*+;=%]+)*
|
||||
;
|
||||
|
||||
// Branch 1: URLs with explicit schemes (http, mailto, ftp, etc.).
|
||||
const scheme_url_branch =
|
||||
"(?:" ++ url_schemes ++ ")" ++
|
||||
"(?:" ++ ipv6_url_pattern ++ "|" ++ scheme_url_chars ++ "+" ++ optional_bracketed_word_suffix ++ ")+" ++
|
||||
no_trailing_punctuation;
|
||||
|
||||
const rooted_or_relative_path_prefix =
|
||||
\\(?:\.\.\/|\.\/|(?<!\w)~\/|(?:[\w][\w\-.]*\/)*(?<!\w)\$[A-Za-z_]\w*\/|\.[\w][\w\-.]*\/|(?<![\w~\/])\/(?!\/))
|
||||
;
|
||||
|
||||
// Branch 2: Absolute paths and dot-relative paths (/, ./, ../).
|
||||
// A dotted segment is treated as file-like, while the undotted case stays
|
||||
// broad to capture directory-like paths with spaces.
|
||||
const rooted_or_relative_path_branch =
|
||||
rooted_or_relative_path_prefix ++
|
||||
"(?:" ++
|
||||
dotted_path_lookahead ++
|
||||
path_chars ++ "+" ++
|
||||
dotted_path_space_segments ++
|
||||
no_trailing_colon ++
|
||||
trailing_spaces_at_eol ++
|
||||
"|" ++
|
||||
non_dotted_path_lookahead ++
|
||||
path_chars ++ "+" ++
|
||||
any_path_space_segments ++
|
||||
no_trailing_colon ++
|
||||
trailing_spaces_at_eol ++
|
||||
")";
|
||||
|
||||
// Branch 3: Bare relative paths such as src/config/url.zig.
|
||||
const bare_relative_path_prefix =
|
||||
\\(?<!\$\d*)(?<!\w)[\w][\w\-.]*\/
|
||||
;
|
||||
|
||||
const bare_relative_path_branch =
|
||||
dotted_path_lookahead ++
|
||||
bare_relative_path_prefix ++
|
||||
path_chars ++ "+" ++
|
||||
dotted_path_space_segments ++
|
||||
no_trailing_colon ++
|
||||
trailing_spaces_at_eol;
|
||||
|
||||
pub const regex =
|
||||
scheme_url_branch ++
|
||||
"|" ++
|
||||
rooted_or_relative_path_branch ++
|
||||
"|" ++
|
||||
bare_relative_path_branch;
|
||||
|
||||
test "url regex" {
|
||||
const testing = std.testing;
|
||||
|
||||
@@ -77,7 +162,7 @@ test "url regex" {
|
||||
.expect = "https://example.com",
|
||||
},
|
||||
.{
|
||||
.input = "Link trailing colon https://example.com, more text.",
|
||||
.input = "Link trailing comma https://example.com, more text.",
|
||||
.expect = "https://example.com",
|
||||
},
|
||||
.{
|
||||
@@ -148,6 +233,10 @@ test "url regex" {
|
||||
.input = "match git://example.com git links",
|
||||
.expect = "git://example.com",
|
||||
},
|
||||
.{
|
||||
.input = "/tmp/test.txt http://www.google.com",
|
||||
.expect = "/tmp/test.txt",
|
||||
},
|
||||
.{
|
||||
.input = "match tel:+18005551234 tel links",
|
||||
.expect = "tel:+18005551234",
|
||||
@@ -291,6 +380,89 @@ test "url regex" {
|
||||
.input = "some-pkg/src/file.txt more text",
|
||||
.expect = "some-pkg/src/file.txt",
|
||||
},
|
||||
// comma should match substrings
|
||||
.{
|
||||
.input = "src/foo.c,baz.txt",
|
||||
.expect = "src/foo.c",
|
||||
},
|
||||
.{
|
||||
.input = "~/foo/bar.txt",
|
||||
.expect = "~/foo/bar.txt",
|
||||
},
|
||||
.{
|
||||
.input = "open ~/Documents/notes.md please",
|
||||
.expect = "~/Documents/notes.md",
|
||||
},
|
||||
.{
|
||||
.input = "~/.config/ghostty/config",
|
||||
.expect = "~/.config/ghostty/config",
|
||||
},
|
||||
.{
|
||||
.input = "directory: ~/src/ghostty-org/ghostty",
|
||||
.expect = "~/src/ghostty-org/ghostty",
|
||||
},
|
||||
.{
|
||||
.input = "$HOME/src/config/url.zig",
|
||||
.expect = "$HOME/src/config/url.zig",
|
||||
},
|
||||
.{
|
||||
.input = "project dir: $PWD/src/ghostty/main.zig",
|
||||
.expect = "$PWD/src/ghostty/main.zig",
|
||||
},
|
||||
// $VAR mid-path should match fully, not partially from the $
|
||||
.{
|
||||
.input = "foo/$BAR/baz",
|
||||
.expect = "foo/$BAR/baz",
|
||||
},
|
||||
.{
|
||||
.input = ".foo/bar/$VAR",
|
||||
.expect = ".foo/bar/$VAR",
|
||||
},
|
||||
.{
|
||||
.input = ".config/ghostty/config",
|
||||
.expect = ".config/ghostty/config",
|
||||
},
|
||||
.{
|
||||
.input = "loaded from .local/share/ghostty/state.db now",
|
||||
.expect = ".local/share/ghostty/state.db",
|
||||
},
|
||||
.{
|
||||
.input = "../some/where",
|
||||
.expect = "../some/where",
|
||||
},
|
||||
// comma-separated file paths
|
||||
.{
|
||||
.input = " - shared/src/foo/SomeItem.m:12, shared/src/",
|
||||
.expect = "shared/src/foo/SomeItem.m:12",
|
||||
},
|
||||
// mid-string dot should not partially match but fully
|
||||
.{
|
||||
.input = "foo.local/share",
|
||||
.expect = "foo.local/share",
|
||||
},
|
||||
// numeric directory should match fully
|
||||
.{
|
||||
.input = "2024/report.txt",
|
||||
.expect = "2024/report.txt",
|
||||
},
|
||||
// comma should stop matching in spaced path segments
|
||||
.{
|
||||
.input = "./foo bar,baz",
|
||||
.expect = "./foo bar",
|
||||
},
|
||||
.{
|
||||
.input = "/tmp/foo bar,baz",
|
||||
.expect = "/tmp/foo bar",
|
||||
},
|
||||
// trailing colon should not be part of the path
|
||||
.{
|
||||
.input = "./.config/ghostty: Needs upstream (main)",
|
||||
.expect = "./.config/ghostty",
|
||||
},
|
||||
.{
|
||||
.input = "./Downloads: Operation not permitted",
|
||||
.expect = "./Downloads",
|
||||
},
|
||||
};
|
||||
|
||||
for (cases) |case| {
|
||||
@@ -306,10 +478,23 @@ test "url regex" {
|
||||
try testing.expectEqualStrings(case.expect, match);
|
||||
}
|
||||
|
||||
// Bare relative paths without any dot should not match as file paths
|
||||
const no_match_cases = [_][]const u8{
|
||||
// bare relative paths without any dot should not match as file paths
|
||||
"input/output",
|
||||
"foo/bar",
|
||||
// $-numeric character should not match
|
||||
"$10/bar",
|
||||
"$10/$20",
|
||||
"$10/bar.txt",
|
||||
// comma should not let dot detection look past it
|
||||
"foo/bar,baz.txt",
|
||||
// $VAR should not match mid-word
|
||||
"foo$BAR/baz.txt",
|
||||
// ~ should not match mid-word
|
||||
"foo~/bar.txt",
|
||||
// double-slash comments are not paths
|
||||
"// foo bar",
|
||||
"//foo",
|
||||
};
|
||||
for (no_match_cases) |input| {
|
||||
var result = re.search(input, .{});
|
||||
|
||||
Reference in New Issue
Block a user