refactor(path)!: support RFC3986 in path_with_url() #36564

Problem:
Nvim does not recognize URI scheme with numeric characters. While rare, there
are URIs that contain numbers (e.g. [ed2k://](https://en.wikipedia.org/wiki/Ed2k_URI_scheme))
and characters like `+` (e.g. `svn+ssh`). I use it in
[distant.nvim](https://github.com/chipsenkbeil/distant.nvim) to support
multiple, distinct connections using `distant+1234://` as the scheme.
Otherwise, if you open a file with the same name & path on two different
machines from the same Nvim instance, their buffer names will conflict
when just using `distant://`.

Solution:
Adds full support for detecting URI scheme per
[RFC3986](https://www.rfc-editor.org/rfc/rfc3986#section-3.1)
This commit is contained in:
Chip Senkbeil
2025-11-15 15:40:01 -06:00
committed by GitHub
parent b3c099b1dd
commit c123b7245c
4 changed files with 51 additions and 16 deletions

View File

@@ -71,6 +71,8 @@ EDITOR
- |i_CTRL-R| inserts named/clipboard registers (A-Z,a-z,0-9+) literally, like
pasting instead of like user input. Improves performance, avoids broken
formatting. To get the old behavior you can use `<C-R>=@x`.
- Buffer names now follow RFC3986 for detecting a scheme, meaning
"svn+ssh", "ed2k", and "iris.xpc" are now treated as URI schemes
EVENTS

View File

@@ -1749,10 +1749,6 @@ int path_with_url(const char *fname)
{
const char *p;
// We accept alphabetic characters and a dash in scheme part.
// RFC 3986 allows for more, but it increases the risk of matching
// non-URL text.
// first character must be alpha
if (!ASCII_ISALPHA(*fname)) {
return 0;
@@ -1762,11 +1758,11 @@ int path_with_url(const char *fname)
return 0;
}
// check body: alpha or dash
for (p = fname + 1; (ASCII_ISALPHA(*p) || (*p == '-')); p++) {}
// check body: (alpha, digit, '+', '-', '.') following RFC3986
for (p = fname + 1; (ASCII_ISALNUM(*p) || (*p == '+') || (*p == '-') || (*p == '.')); p++) {}
// check last char is not a dash
if (p[-1] == '-') {
// check last char is not '+', '-', or '.'
if ((p[-1] == '+') || (p[-1] == '-') || (p[-1] == '.')) {
return 0;
}

View File

@@ -445,12 +445,18 @@ func Test_buffer_scheme()
set noshellslash
%bwipe!
let bufnames = [
\ #{id: 'ssb0', name: 'test://xyz/foo/ssb0' , match: 1},
\ #{id: 'ssb1', name: 'test+abc://xyz/foo/ssb1', match: 0},
\ #{id: 'ssb2', name: 'test_abc://xyz/foo/ssb2', match: 0},
\ #{id: 'ssb3', name: 'test-abc://xyz/foo/ssb3', match: 1},
\ #{id: 'ssb4', name: '-test://xyz/foo/ssb4' , match: 0},
\ #{id: 'ssb5', name: 'test-://xyz/foo/ssb5' , match: 0},
\ #{id: 'ssb0' , name: 'test://xyz/foo/ssb0' , match: 1},
\ #{id: 'ssb1' , name: 'test1234://xyz/foo/ssb1', match: 1},
\ #{id: 'ssb2' , name: 'test+abc://xyz/foo/ssb2', match: 1},
\ #{id: 'ssb3' , name: 'test-abc://xyz/foo/ssb3', match: 1},
\ #{id: 'ssb4' , name: 'test.abc://xyz/foo/ssb4', match: 1},
\ #{id: 'ssb5' , name: 'test_abc://xyz/foo/ssb5', match: 0},
\ #{id: 'ssb6' , name: '+test://xyz/foo/ssb6' , match: 0},
\ #{id: 'ssb7' , name: 'test+://xyz/foo/ssb7' , match: 0},
\ #{id: 'ssb8' , name: '-test://xyz/foo/ssb8' , match: 0},
\ #{id: 'ssb9' , name: 'test-://xyz/foo/ssb9' , match: 0},
\ #{id: 'ssb10', name: '.test://xyz/foo/ssb10' , match: 0},
\ #{id: 'ssb11', name: 'test.://xyz/foo/ssb11' , match: 0},
\]
for buf in bufnames
new `=buf.name`

View File

@@ -693,20 +693,51 @@ describe('path.c', function()
end)
describe('path_with_url', function()
itp('scheme is alpha and inner hyphen only', function()
itp('scheme is alpha and inner numeric, "+", "-", "." only', function()
local function path_with_url(fname)
return cimp.path_with_url(to_cstr(fname))
end
-- Check normal scheme with just alphabetic
eq(1, path_with_url([[test://xyz/foo/b0]]))
eq(2, path_with_url([[test:\\xyz\foo\b0]]))
eq(0, path_with_url([[test+abc://xyz/foo/b1]]))
-- Check valid scheme with just alphanumeric
eq(1, path_with_url([[test123://xyz/foo/b0]]))
eq(2, path_with_url([[test123:\\xyz\foo\b0]]))
-- Check invalid scheme (contains invalid character)
eq(0, path_with_url([[test_abc://xyz/foo/b2]]))
-- Check valid scheme containing '+', '-', or '.'
eq(1, path_with_url([[test+abc://xyz/foo/b1]]))
eq(2, path_with_url([[test+abc:\\xyz\foo\b1]]))
eq(1, path_with_url([[test-abc://xyz/foo/b3]]))
eq(2, path_with_url([[test-abc:\\xyz\foo\b3]]))
eq(1, path_with_url([[test.abc://xyz/foo/b1]]))
eq(2, path_with_url([[test.abc:\\xyz\foo\b1]]))
-- Check valid scheme with full suite of allowed characters
eq(1, path_with_url([[test+abc-123.ghi://xyz/foo/b1]]))
eq(2, path_with_url([[test+abc-123.ghi:\\xyz\foo\b1]]))
-- Check invalid scheme starting or ending wiht '+', '-', or '.'
eq(0, path_with_url([[-test://xyz/foo/b4]]))
eq(0, path_with_url([[test-://xyz/foo/b5]]))
eq(0, path_with_url([[+test://xyz/foo/b4]]))
eq(0, path_with_url([[test+://xyz/foo/b5]]))
eq(0, path_with_url([[.test://xyz/foo/b4]]))
eq(0, path_with_url([[test.://xyz/foo/b5]]))
-- Check additional valid scheme containing '+', '-', or '.'
eq(1, path_with_url([[test-C:/xyz/foo/b5]]))
eq(1, path_with_url([[test-custom:/xyz/foo/b5]]))
eq(1, path_with_url([[test+C:/xyz/foo/b5]]))
eq(1, path_with_url([[test+custom:/xyz/foo/b5]]))
eq(1, path_with_url([[test.C:/xyz/foo/b5]]))
eq(1, path_with_url([[test.custom:/xyz/foo/b5]]))
-- Check invalid scheme representing drive letter
eq(0, path_with_url([[c:/xyz/foo/b5]]))
eq(0, path_with_url([[C:/xyz/foo/b5]]))
end)