refactor(lua): refactored glob

This commit is contained in:
altermo
2024-01-18 10:02:35 +01:00
committed by Lewis Russell
parent 0b36cbbafd
commit 9707363b09
2 changed files with 23 additions and 25 deletions

View File

@@ -1,7 +1,11 @@
local lpeg = vim.lpeg
local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
local M = {}
local pathsep = P('/')
--- Parses a raw glob into an |lua-lpeg| pattern.
---
--- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
@@ -17,18 +21,8 @@ local M = {}
---@param pattern string The raw glob pattern
---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
function M.to_lpeg(pattern)
local l = lpeg
local P, S, V = lpeg.P, lpeg.S, lpeg.V
local C, Cc, Ct, Cf = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf
local pathsep = '/'
local function class(inv, ranges)
for i, r in ipairs(ranges) do
ranges[i] = r[1] .. r[2]
end
local patt = l.R(unpack(ranges))
local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
if inv == '!' then
patt = P(1) - patt
end
@@ -44,11 +38,11 @@ function M.to_lpeg(pattern)
end
local function star(stars, after)
return (-after * (l.P(1) - pathsep)) ^ #stars * after
return (-after * (P(1) - pathsep)) ^ #stars * after
end
local function dstar(after)
return (-after * l.P(1)) ^ 0 * after
return (-after * P(1)) ^ 0 * after
end
local p = P({
@@ -59,11 +53,17 @@ function M.to_lpeg(pattern)
* (V('Elem') + V('End')),
mul
),
DStar = P('**') * (P(pathsep) * (V('Elem') + V('End')) + V('End')) / dstar,
DStar = (B(pathsep) + -B(P(1)))
* P('**')
* (pathsep * (V('Elem') + V('End')) + V('End'))
/ dstar,
Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
Ques = P('?') * Cc(l.P(1) - pathsep),
Class = P('[') * C(P('!') ^ -1) * Ct(Ct(C(1) * '-' * C(P(1) - ']')) ^ 1 * ']') / class,
CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * '}',
Ques = P('?') * Cc(P(1) - pathsep),
Class = P('[')
* C(P('!') ^ -1)
* Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
/ class,
CondList = P('{') * Cf(V('Cond') * (P(',') * V('Cond')) ^ 0, add) * P('}'),
-- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
-- wildcard semantics it usually has.
-- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
@@ -71,9 +71,9 @@ function M.to_lpeg(pattern)
-- condition means "everything after the {}" where several other options separated by ',' may
-- exist in between that should not be matched by '*'.
Cond = Cf((V('Ques') + V('Class') + V('CondList') + (V('Literal') - S(',}'))) ^ 1, mul)
+ Cc(l.P(0)),
Literal = P(1) / l.P,
End = P(-1) * Cc(l.P(-1)),
+ Cc(P(0)),
Literal = P(1) / P,
End = P(-1) * Cc(P(-1)),
})
local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]

View File

@@ -67,18 +67,16 @@ describe('glob', function()
eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt'))
eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt'))
-- TODO: The spec does not describe this, but VSCode only interprets ** when it's by
-- The spec does not describe this, but VSCode only interprets ** when it's by
-- itself in a path segment, and otherwise interprets ** as consecutive * directives.
-- The following tests show how this behavior should work, but is not yet fully implemented.
-- Currently, "a**" parses incorrectly as "a" "**" and "**a" parses correctly as "*" "*" "a".
-- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112
eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment
eq(true, match('**c', 'abc'))
-- eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character
eq(false, match('**c', 'bc'))
eq(true, match('a**', 'abcd'))
eq(true, match('**d', 'abcd'))
-- eq(false, match('a**', 'abc/d'))
eq(false, match('a**', 'abc/d'))
eq(false, match('**d', 'abc/d'))
end)