url: improve space in path handling

The space-segment patterns in the path regex (dotted_path_space_segments
and any_path_space_segments) greedily consume text after a space even
when we know that the text is the start of a new independent path (e.g.,
`/tmp/bar` in `/tmp/foo /tmp/bar`).

Fix: Add two negative lookaheads after the space in both patterns:
- `(?!\.{0,2}\/)` →  don't match if the next segment starts with `/`,
  `./`, or `../`
- `(?!~\/)` →  don't match if the next segment starts with `~/`
This commit is contained in:
Ben Kircher
2026-02-16 07:05:12 +01:00
parent 2ac3c1f1da
commit c5488afc75

View File

@@ -65,11 +65,11 @@ const non_dotted_path_lookahead =
;
const dotted_path_space_segments =
\\(?:(?<!:) (?!\w+:\/\/)[\w\-.~:\/?#@!$&*+;=%]*[\/.])*
\\(?:(?<!:) (?!\w+:\/\/)(?!\.{0,2}\/)(?!~\/)[\w\-.~:\/?#@!$&*+;=%]*[\/.])*
;
const any_path_space_segments =
\\(?:(?<!:) (?!\w+:\/\/)[\w\-.~:\/?#@!$&*+;=%]+)*
\\(?:(?<!:) (?!\w+:\/\/)(?!\.{0,2}\/)(?!~\/)[\w\-.~:\/?#@!$&*+;=%]+)*
;
// Branch 1: URLs with explicit schemes (http, mailto, ftp, etc.).
@@ -359,6 +359,19 @@ test "url regex" {
.input = "/tmp/test folder/file.txt",
.expect = "/tmp/test folder/file.txt",
},
.{
.input = "/tmp/test folder/file.txt",
.expect = "/tmp/test",
},
// Two space-separated absolute paths should match only the first
.{
.input = "/tmp/foo /tmp/bar",
.expect = "/tmp/foo",
},
.{
.input = "/tmp/foo.txt /tmp/bar.txt",
.expect = "/tmp/foo.txt",
},
// Bare relative file paths (no ./ or ../ prefix)
.{
.input = "src/config/url.zig",