From c123b7245c438fea5dda4aa26aa06b4c161191de Mon Sep 17 00:00:00 2001 From: Chip Senkbeil Date: Sat, 15 Nov 2025 15:40:01 -0600 Subject: [PATCH] refactor(path)!: support RFC3986 in path_with_url() #36564 Problem: Nvim does not recognize URI scheme with numeric characters. While rare, there are URIs that contain numbers (e.g. [ed2k://](https://en.wikipedia.org/wiki/Ed2k_URI_scheme)) and characters like `+` (e.g. `svn+ssh`). I use it in [distant.nvim](https://github.com/chipsenkbeil/distant.nvim) to support multiple, distinct connections using `distant+1234://` as the scheme. Otherwise, if you open a file with the same name & path on two different machines from the same Nvim instance, their buffer names will conflict when just using `distant://`. Solution: Adds full support for detecting URI scheme per [RFC3986](https://www.rfc-editor.org/rfc/rfc3986#section-3.1) --- runtime/doc/news.txt | 2 ++ src/nvim/path.c | 12 ++++------- test/old/testdir/test_buffer.vim | 18 ++++++++++------ test/unit/path_spec.lua | 35 ++++++++++++++++++++++++++++++-- 4 files changed, 51 insertions(+), 16 deletions(-) diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index 7b2a840505..3c790674e9 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -71,6 +71,8 @@ EDITOR - |i_CTRL-R| inserts named/clipboard registers (A-Z,a-z,0-9+) literally, like pasting instead of like user input. Improves performance, avoids broken formatting. To get the old behavior you can use `=@x`. +- Buffer names now follow RFC3986 for detecting a scheme, meaning + "svn+ssh", "ed2k", and "iris.xpc" are now treated as URI schemes EVENTS diff --git a/src/nvim/path.c b/src/nvim/path.c index 267c0d1fab..8efe8520fd 100644 --- a/src/nvim/path.c +++ b/src/nvim/path.c @@ -1749,10 +1749,6 @@ int path_with_url(const char *fname) { const char *p; - // We accept alphabetic characters and a dash in scheme part. - // RFC 3986 allows for more, but it increases the risk of matching - // non-URL text. - // first character must be alpha if (!ASCII_ISALPHA(*fname)) { return 0; @@ -1762,11 +1758,11 @@ int path_with_url(const char *fname) return 0; } - // check body: alpha or dash - for (p = fname + 1; (ASCII_ISALPHA(*p) || (*p == '-')); p++) {} + // check body: (alpha, digit, '+', '-', '.') following RFC3986 + for (p = fname + 1; (ASCII_ISALNUM(*p) || (*p == '+') || (*p == '-') || (*p == '.')); p++) {} - // check last char is not a dash - if (p[-1] == '-') { + // check last char is not '+', '-', or '.' + if ((p[-1] == '+') || (p[-1] == '-') || (p[-1] == '.')) { return 0; } diff --git a/test/old/testdir/test_buffer.vim b/test/old/testdir/test_buffer.vim index 1e4f19eb69..97ef48ef28 100644 --- a/test/old/testdir/test_buffer.vim +++ b/test/old/testdir/test_buffer.vim @@ -445,12 +445,18 @@ func Test_buffer_scheme() set noshellslash %bwipe! let bufnames = [ - \ #{id: 'ssb0', name: 'test://xyz/foo/ssb0' , match: 1}, - \ #{id: 'ssb1', name: 'test+abc://xyz/foo/ssb1', match: 0}, - \ #{id: 'ssb2', name: 'test_abc://xyz/foo/ssb2', match: 0}, - \ #{id: 'ssb3', name: 'test-abc://xyz/foo/ssb3', match: 1}, - \ #{id: 'ssb4', name: '-test://xyz/foo/ssb4' , match: 0}, - \ #{id: 'ssb5', name: 'test-://xyz/foo/ssb5' , match: 0}, + \ #{id: 'ssb0' , name: 'test://xyz/foo/ssb0' , match: 1}, + \ #{id: 'ssb1' , name: 'test1234://xyz/foo/ssb1', match: 1}, + \ #{id: 'ssb2' , name: 'test+abc://xyz/foo/ssb2', match: 1}, + \ #{id: 'ssb3' , name: 'test-abc://xyz/foo/ssb3', match: 1}, + \ #{id: 'ssb4' , name: 'test.abc://xyz/foo/ssb4', match: 1}, + \ #{id: 'ssb5' , name: 'test_abc://xyz/foo/ssb5', match: 0}, + \ #{id: 'ssb6' , name: '+test://xyz/foo/ssb6' , match: 0}, + \ #{id: 'ssb7' , name: 'test+://xyz/foo/ssb7' , match: 0}, + \ #{id: 'ssb8' , name: '-test://xyz/foo/ssb8' , match: 0}, + \ #{id: 'ssb9' , name: 'test-://xyz/foo/ssb9' , match: 0}, + \ #{id: 'ssb10', name: '.test://xyz/foo/ssb10' , match: 0}, + \ #{id: 'ssb11', name: 'test.://xyz/foo/ssb11' , match: 0}, \] for buf in bufnames new `=buf.name` diff --git a/test/unit/path_spec.lua b/test/unit/path_spec.lua index ffad552a8a..8aa2121f34 100644 --- a/test/unit/path_spec.lua +++ b/test/unit/path_spec.lua @@ -693,20 +693,51 @@ describe('path.c', function() end) describe('path_with_url', function() - itp('scheme is alpha and inner hyphen only', function() + itp('scheme is alpha and inner numeric, "+", "-", "." only', function() local function path_with_url(fname) return cimp.path_with_url(to_cstr(fname)) end + + -- Check normal scheme with just alphabetic eq(1, path_with_url([[test://xyz/foo/b0]])) eq(2, path_with_url([[test:\\xyz\foo\b0]])) - eq(0, path_with_url([[test+abc://xyz/foo/b1]])) + + -- Check valid scheme with just alphanumeric + eq(1, path_with_url([[test123://xyz/foo/b0]])) + eq(2, path_with_url([[test123:\\xyz\foo\b0]])) + + -- Check invalid scheme (contains invalid character) eq(0, path_with_url([[test_abc://xyz/foo/b2]])) + + -- Check valid scheme containing '+', '-', or '.' + eq(1, path_with_url([[test+abc://xyz/foo/b1]])) + eq(2, path_with_url([[test+abc:\\xyz\foo\b1]])) eq(1, path_with_url([[test-abc://xyz/foo/b3]])) eq(2, path_with_url([[test-abc:\\xyz\foo\b3]])) + eq(1, path_with_url([[test.abc://xyz/foo/b1]])) + eq(2, path_with_url([[test.abc:\\xyz\foo\b1]])) + + -- Check valid scheme with full suite of allowed characters + eq(1, path_with_url([[test+abc-123.ghi://xyz/foo/b1]])) + eq(2, path_with_url([[test+abc-123.ghi:\\xyz\foo\b1]])) + + -- Check invalid scheme starting or ending wiht '+', '-', or '.' eq(0, path_with_url([[-test://xyz/foo/b4]])) eq(0, path_with_url([[test-://xyz/foo/b5]])) + eq(0, path_with_url([[+test://xyz/foo/b4]])) + eq(0, path_with_url([[test+://xyz/foo/b5]])) + eq(0, path_with_url([[.test://xyz/foo/b4]])) + eq(0, path_with_url([[test.://xyz/foo/b5]])) + + -- Check additional valid scheme containing '+', '-', or '.' eq(1, path_with_url([[test-C:/xyz/foo/b5]])) eq(1, path_with_url([[test-custom:/xyz/foo/b5]])) + eq(1, path_with_url([[test+C:/xyz/foo/b5]])) + eq(1, path_with_url([[test+custom:/xyz/foo/b5]])) + eq(1, path_with_url([[test.C:/xyz/foo/b5]])) + eq(1, path_with_url([[test.custom:/xyz/foo/b5]])) + + -- Check invalid scheme representing drive letter eq(0, path_with_url([[c:/xyz/foo/b5]])) eq(0, path_with_url([[C:/xyz/foo/b5]])) end)