diff --git a/runtime/doc/lua.txt b/runtime/doc/lua.txt index 926295b226..26b591d077 100644 --- a/runtime/doc/lua.txt +++ b/runtime/doc/lua.txt @@ -3236,30 +3236,51 @@ vim.fs.root({source}, {marker}) *vim.fs.root()* ============================================================================== Lua module: vim.glob *vim.glob* +Glob-to-LPeg Converter (Peglob) This module converts glob patterns to LPeg +patterns according to the LSP 3.17 specification: +https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern + +Glob grammar overview: +• `*` to match zero or more characters in a path segment +• `?` to match on one character in a path segment +• `**` to match any number of path segments, including none +• `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript + files) +• `[]` to declare a range of characters to match in a path segment (e.g., + `example.[0-9]` to match on `example.0`, `example.1`, …) +• `[!...]` to negate a range of characters to match in a path segment (e.g., + `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`) + +Additional constraints: +• A Glob pattern must match an entire path, with partial matches considered + failures. +• The pattern only determines success or failure, without specifying which + parts correspond to which characters. +• A path segment is the portion of a path between two adjacent path separators + (`/`), or between the start/end of the path and the nearest separator. +• The `**` (globstar) pattern matches zero or more path segments, including + intervening separators (`/`). Within pattern strings, `**` must be delimited + by path separators (`/`) or pattern boundaries and cannot be adjacent to any + characters other than `/`. If `**` is not the final element, it must be + followed by `/`. +• `{}` (braced conditions) contains valid Glob patterns as branches, separated + by commas. Commas are exclusively used for separating branches and cannot + appear within a branch for any other purpose. Nested `{}` structures are + allowed, but `{}` must contain at least two branches—zero or one branch is + not permitted. +• In `[]` or `[!...]`, a character range consists of character intervals + (e.g., `a-z`) or individual characters (e.g., `w`). A range including `/` + won’t match that character. + + vim.glob.to_lpeg({pattern}) *vim.glob.to_lpeg()* Parses a raw glob into an |lua-lpeg| pattern. - This uses glob semantics from LSP 3.17.0: - https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern - - Glob patterns can have the following syntax: - • `*` to match one or more characters in a path segment - • `?` to match on one character in a path segment - • `**` to match any number of path segments, including none - • `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and - JavaScript files) - • `[]` to declare a range of characters to match in a path segment (e.g., - `example.[0-9]` to match on `example.0`, `example.1`, …) - • `[!...]` to negate a range of characters to match in a path segment - (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not - `example.0`) - Parameters: ~ • {pattern} (`string`) The raw glob pattern Return: ~ - (`vim.lpeg.Pattern`) pattern An |lua-lpeg| representation of the - pattern + (`vim.lpeg.Pattern`) An |lua-lpeg| representation of the pattern ============================================================================== diff --git a/runtime/doc/news.txt b/runtime/doc/news.txt index 3cda63dd58..17bf29f93c 100644 --- a/runtime/doc/news.txt +++ b/runtime/doc/news.txt @@ -175,7 +175,11 @@ OPTIONS PERFORMANCE -• todo +• |vim.glob.to_lpeg()| uses a new LPeg-based implementation (Peglob) that + provides ~50% speedup for complex patterns. The implementation restores + support for nested braces and follows LSP 3.17 specification with + additional constraints for improved correctness and resistance to + backtracking edge cases. PLUGINS diff --git a/runtime/lua/vim/glob.lua b/runtime/lua/vim/glob.lua index 242c70d4b2..740cf7a7d1 100644 --- a/runtime/lua/vim/glob.lua +++ b/runtime/lua/vim/glob.lua @@ -1,93 +1,375 @@ -local lpeg = vim.lpeg -local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B -local C, Cc, Ct, Cf, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cmt - -local M = {} - -local pathsep = P('/') - ---- Parses a raw glob into an |lua-lpeg| pattern. +--- @brief Glob-to-LPeg Converter (Peglob) +--- This module converts glob patterns to LPeg patterns according to the LSP 3.17 specification: +--- https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern --- ---- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern ---- ---- Glob patterns can have the following syntax: ---- - `*` to match one or more characters in a path segment +--- Glob grammar overview: +--- - `*` to match zero or more characters in a path segment --- - `?` to match on one character in a path segment --- - `**` to match any number of path segments, including none --- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files) ---- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …) ---- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`) +--- - `[]` to declare a range of characters to match in a path segment +--- (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …) +--- - `[!...]` to negate a range of characters to match in a path segment +--- (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`) +--- +--- Additional constraints: +--- - A Glob pattern must match an entire path, with partial matches +--- considered failures. +--- - The pattern only determines success or failure, without specifying +--- which parts correspond to which characters. +--- - A *path segment* is the portion of a path between two adjacent path +--- separators (`/`), or between the start/end of the path and the nearest +--- separator. +--- - The `**` (*globstar*) pattern matches zero or more path segments, +--- including intervening separators (`/`). Within pattern strings, `**` +--- must be delimited by path separators (`/`) or pattern boundaries and +--- cannot be adjacent to any characters other than `/`. If `**` is not +--- the final element, it must be followed by `/`. +--- - `{}` (*braced conditions*) contains valid Glob patterns as branches, +--- separated by commas. Commas are exclusively used for separating +--- branches and cannot appear within a branch for any other purpose. +--- Nested `{}` structures are allowed, but `{}` must contain at least two +--- branches—zero or one branch is not permitted. +--- - In `[]` or `[!...]`, a *character range* consists of character +--- intervals (e.g., `a-z`) or individual characters (e.g., `w`). A range +--- including `/` won’t match that character. + +--- @diagnostic disable: missing-fields + +local m = vim.lpeg +local mt = getmetatable(m.P(0)) +local re = vim.re +local bit = require('bit') + +local M = {} + +-- Basic patterns for matching glob components +local letter = m.P(1) - m.S(',*?[]{}/\\') -- Any character except special glob characters +local slash = m.P '/' * m.Cc(m.P '/') -- Path separator with capture +local notslash = m.P(1) - m.P '/' -- Any character except path separator +local notcomma = m.P(1) - m.S(',\\') -- Any character except comma and backslash + +--- Handle EOF, considering whether we're in a segment or not +--- @type vim.lpeg.Pattern +local eof = -1 + * m.Cb('inseg') + / function(flag) + if flag then + return #m.P '/' + else + return m.P(-1) + end + end + +---@alias pat_table { F: string?, [1]: string, [2]: vim.lpeg.Pattern } +---@alias seg_part { [string]: any, [integer]: pat_table } + +--- @param p pat_table Initial segment pattern data +--- @return seg_part Segment structure with start pattern +local function start_seg(p) + return { s = p[2], e = true, n = 0 } +end + +--- @param t seg_part Segment structure +--- @param p pat_table Pattern to look for +--- @return table Updated segment structure +local function lookfor(t, p) + t.n = t.n + 1 + t[t.n] = p + return t +end + +--- @param t seg_part Segment structure +--- @return table Segment structure with end pattern +local function to_seg_end(t) + t.e = notslash ^ 0 + return t +end + +--- Constructs a segment matching pattern from collected components +--- +--- @param t seg_part Segment structure with patterns +--- @return vim.lpeg.Pattern Complete segment match pattern +local function end_seg(t) + --- @type table + local seg_grammar = { 's' } + if t.n > 0 then + seg_grammar.s = t.s + for i = 1, t.n do + local rname = t[i][1] + if not seg_grammar[rname] then + -- Optimize search when deterministic first character is available + if t[i].F then + seg_grammar[rname] = t[i][2] + notslash * (notslash - m.P(t[i].F)) ^ 0 * m.V(rname) + else + seg_grammar[rname] = t[i][2] + notslash * m.V(rname) + end + end + seg_grammar.s = seg_grammar.s * m.V(rname) + end + if t.e then + seg_grammar.s = seg_grammar.s * t.e + end + return m.P(seg_grammar) + else + seg_grammar.s = t.s + if t.e then + seg_grammar.s = seg_grammar.s * t.e + end + return seg_grammar.s + end +end + +--- @param p vim.lpeg.Pattern Pattern directly after `**/` +--- @return vim.lpeg.Pattern LPeg pattern for `**/p` +local function dseg(p) + return m.P { p + notslash ^ 0 * m.P '/' * m.V(1) } +end + +--- @type (vim.lpeg.Pattern|table) +local g = nil + +--- Multiplies conditions for braced expansion (Cartesian product) +--- +--- @param a string|string[] First part +--- @param b string|string[] Second part +--- @return string|string[] Cartesian product of values +local function mul_cond(a, b) + if type(a) == 'string' then + if type(b) == 'string' then + return a .. b + elseif type(b) == 'table' then + for i = 1, #b do + b[i] = a .. b[i] + end + return b + else + return a + end + elseif type(a) == 'table' then + if type(b) == 'string' then + for i = 1, #a do + a[i] = a[i] .. b + end + return a + elseif type(b) == 'table' then + --- @type string[] + local res = {} + local idx = 0 + for i = 1, #a do + for j = 1, #b do + idx = idx + 1 + res[idx] = a[i] .. b[j] + end + end + return res + else + return a + end + else + return b + end +end + +--- Combines alternatives in braced patterns +--- +--- @param a string|table First part +--- @param b string|table Second part +--- @return table #Combined alternatives +local function add_cond(a, b) + if type(a) == 'string' then + if type(b) == 'string' then + return { a, b } + elseif type(b) == 'table' then + table.insert(b, 1, a) + return b + end + elseif type(a) == 'table' then + if type(b) == 'string' then + table.insert(a, b) + return a + elseif type(b) == 'table' then + for i = 1, #b do + table.insert(a, b[i]) + end + return a + end + --- @diagnostic disable-next-line: missing-return + end +end + +--- Expands patterns handling segment boundaries +--- `#` prefix is added for sub-grammar to detect in-segment flag +--- +---@param a (any[]|vim.lpeg.Pattern[]) Array of patterns +---@param b string Tail string +---@param inseg boolean Whether inside a path segment +---@return vim.lpeg.Pattern #Expanded pattern +local function expand(a, b, inseg) + for i = 1, #a do + if inseg then + a[i] = '#' .. a[i] + end + a[i] = g:match(a[i] .. b) + end + local res = a[1] + for i = 2, #a do + res = res + a[i] + end + return res +end + +--- Converts a UTF-8 character to its Unicode codepoint +--- +--- @param utf8_str string UTF-8 character +--- @return number #Codepoint value +local function to_codepoint(utf8_str) + local codepoint = 0 + local byte_count = 0 + + for i = 1, #utf8_str do + local byte = utf8_str:byte(i) + + if byte_count ~= 0 then + codepoint = bit.bor(bit.lshift(codepoint, 6), bit.band(byte, 0x3F)) + byte_count = byte_count - 1 + else + if byte < 0x80 then + codepoint = byte + elseif byte < 0xE0 then + byte_count = 1 + codepoint = bit.band(byte, 0x1F) + elseif byte < 0xF0 then + byte_count = 2 + codepoint = bit.band(byte, 0x0F) + else + byte_count = 3 + codepoint = bit.band(byte, 0x07) + end + end + + if byte_count == 0 then + break + end + end + + return codepoint +end + +--- Pattern for matching UTF-8 characters +local cont = m.R('\128\191') +local any_utf8 = m.R('\0\127') + + m.R('\194\223') * cont + + m.R('\224\239') * cont * cont + + m.R('\240\244') * cont * cont * cont + +--- Creates a character class pattern for glob ranges +--- @param inv string Inversion flag ('!' or '') +--- @param ranges (string|string[])[] Character ranges +--- @return vim.lpeg.Pattern #Character class pattern +local function class(inv, ranges) + local patt = m.P(false) + if #ranges == 0 then + if inv == '!' then + return m.P '[!]' + else + return m.P '[]' + end + end + for _, v in ipairs(ranges) do + patt = patt + (type(v) == 'table' and m.utfR(to_codepoint(v[1]), to_codepoint(v[2])) or m.P(v)) + end + if inv == '!' then + patt = m.P(1) - patt --[[@as vim.lpeg.Pattern]] + end + return patt - m.P '/' +end + +-- Parse constraints for optimizing braced conditions +local noopt_condlist = re.compile [[ + s <- '/' / '**' / . [^/*]* s +]] + +local opt_tail = re.compile [[ + s <- (!'**' [^{/])* &'/' +]] + +-- stylua: ignore start +--- @nodoc +--- @diagnostic disable +--- Main grammar for glob pattern matching +g = { + 'Glob', + Glob = (m.P'#' * m.Cg(m.Cc(true), 'inseg') + m.Cg(m.Cc(false), 'inseg')) * + m.Cf(m.V'Element'^-1 * (slash * m.V'Element')^0 * (slash^-1 * eof), mt.__mul), + -- Elements handle segments, globstar patterns + Element = m.V'DSeg' + m.V'DSEnd' + m.Cf(m.V'Segment' * (slash * m.V'Segment')^0 * (slash * eof + eof^-1), mt.__mul), + -- Globstar patterns + DSeg = m.P'**/' * ((m.V'Element' + eof) / dseg), + DSEnd = m.P'**' * -1 * m.Cc(m.P(1)^0), + -- Segment handling with word and star patterns + Segment = (m.V'Word' / start_seg + m.Cc({ '', true }) / start_seg * (m.V'Star' * m.V'Word' % lookfor)) * + (m.V'Star' * m.V'Word' % lookfor)^0 * (m.V'Star' * m.V'CheckBnd' % to_seg_end)^-1 / end_seg + + m.V'Star' * m.V'CheckBnd' * m.Cc(notslash^0), + CheckBnd = #m.P'/' + -1, -- Boundary constraint + + -- Word patterns for fixed-length matching + Word = -m.P'*' * m.Ct( m.V('FIRST')^-1 * m.C(m.V'WordAux') ), + WordAux = m.V'Branch' + m.Cf(m.V'Simple'^1 * m.V'Branch'^-1, mt.__mul), + Simple = m.Cg( m.V'Token' * (m.V'Token' % mt.__mul)^0 * (m.V'Boundary' % mt.__mul)^-1), + Boundary = #m.P'/' * m.Cc(#m.P'/') + eof, + Token = m.V'Ques' + m.V'Class' + m.V'Escape' + m.V'Literal', + Star = m.P'*', + Ques = m.P'?' * m.Cc(notslash), + Escape = m.P'\\' * m.C(1) / m.P, + Literal = m.C(letter^1) / m.P, + + -- Branch handling for braced conditions + Branch = m.Cmt(m.C(m.V'CondList'), function(s, i, p1, p2) + -- Optimize brace expansion when possible + -- p1: string form of condition list, p2: transformed lua table + if noopt_condlist:match(p1) then + -- Cannot optimize, match till the end + return #s + 1, p2, s:sub(i) + end + -- Find point to cut for optimization + local cut = opt_tail:match(s, i) + if cut then + -- Can optimize: match till cut point + -- true flag tells expand to transform EOF matches to &'/' predicates + return cut, p2, s:sub(i, cut - 1), true + else + -- Cannot optimize + return #s + 1, p2, s:sub(i) + end + end) / expand, + -- Brace expansion handling + CondList = m.Cf(m.P'{' * m.V'Cond' * (m.P',' * m.V'Cond')^1 * m.P'}', add_cond), + Cond = m.Cf((m.C((notcomma + m.P'\\' * 1 - m.S'{}')^1) + m.V'CondList')^1, mul_cond) + m.C(true), + + -- Character class handling + Class = m.P'[' * m.C(m.P'!'^-1) * m.Ct( + (m.Ct(m.C(any_utf8) * m.P'-' * m.C(any_utf8 - m.P']')) + m.C(any_utf8 - m.P']'))^0 + ) * m.P']' / class, + + -- Deterministic first character extraction for optimization + FIRST = m.Cg(m.P(function(s, i) + if letter:match(s, i) then return true, s:sub(i, i) + else return false end + end), 'F') +} +-- stylua: ignore end +--- @diagnostic enable + +--- @nodoc +g = m.P(g) + +--- Parses a raw glob into an |lua-lpeg| pattern. --- ---@param pattern string The raw glob pattern ----@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern +---@return vim.lpeg.Pattern #An |lua-lpeg| representation of the pattern function M.to_lpeg(pattern) - local function class(inv, ranges) - local patt = R(unpack(vim.tbl_map(table.concat, ranges))) - if inv == '!' then - patt = P(1) - patt - end - return patt - end - - local function condlist(conds, after) - return vim.iter(conds):fold(P(false), function(acc, cond) - return acc + cond * after - end) - end - - local function mul(acc, m) - return acc * m - end - - local function star(stars, after) - return (-after * (P(1) - pathsep)) ^ #stars * after - end - - local function dstar(after) - return (-after * P(1)) ^ 0 * after - end - - -- luacheck: push ignore s - local function cut(_s, idx, match) - return idx, match - end - -- luacheck: pop - - --- @diagnostic disable-next-line: missing-fields - local p = P({ - 'Pattern', - Pattern = V('Elem') ^ -1 * V('End'), - Elem = Cmt( - Cf( - (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal')) - * (V('Elem') + V('End')), - mul - ), - cut - ), - DStar = (B(pathsep) + -B(P(1))) - * P('**') - * (pathsep * (V('Elem') + V('End')) + V('End')) - / dstar, - Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star, - Ques = P('?') * Cc(P(1) - pathsep), - Class = P('[') - * C(P('!') ^ -1) - * Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']')) - / class, - CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist, - -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same - -- wildcard semantics it usually has. - -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the - -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {} - -- condition means "everything after the {}" where several other options separated by ',' may - -- exist in between that should not be matched by '*'. - Cond = Cmt(Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul), cut) + Cc(P(0)), - Literal = P(1) / P, - End = P(-1) * Cc(P(-1)), - }) - - local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]] + local lpeg_pattern = g:match(pattern) --[[@as vim.lpeg.Pattern?]] assert(lpeg_pattern, 'Invalid glob') return lpeg_pattern end diff --git a/test/functional/lua/glob_spec.lua b/test/functional/lua/glob_spec.lua index 8302c7334d..3a105747fe 100644 --- a/test/functional/lua/glob_spec.lua +++ b/test/functional/lua/glob_spec.lua @@ -18,6 +18,7 @@ describe('glob', function() eq(true, match('', '')) eq(false, match('', 'a')) eq(true, match('a', 'a')) + eq(true, match('.', '.')) eq(true, match('/', '/')) eq(true, match('abc', 'abc')) eq(false, match('abc', 'abcdef')) @@ -35,7 +36,8 @@ describe('glob', function() end) it('should match * wildcards', function() - eq(false, match('*', '')) + eq(true, match('*', '')) + eq(true, match('*', ' ')) eq(true, match('*', 'a')) eq(false, match('*', '/')) eq(false, match('*', '/a')) @@ -43,6 +45,7 @@ describe('glob', function() eq(true, match('*', 'aaa')) eq(true, match('*a', 'aa')) eq(true, match('*a', 'abca')) + eq(true, match('*.ts', '.ts')) eq(true, match('*.txt', 'file.txt')) eq(false, match('*.txt', 'file.txtxt')) eq(false, match('*.txt', 'dir/file.txt')) @@ -62,18 +65,13 @@ describe('glob', function() eq(false, match('dir/*/file.txt', 'dir/file.txt')) eq(true, match('dir/*/file.txt', 'dir/subdir/file.txt')) eq(false, match('dir/*/file.txt', 'dir/subdir/subdir/file.txt')) - - -- The spec does not describe this, but VSCode only interprets ** when it's by - -- itself in a path segment, and otherwise interprets ** as consecutive * directives. - -- see: https://github.com/microsoft/vscode/blob/eef30e7165e19b33daa1e15e92fa34ff4a5df0d3/src/vs/base/common/glob.ts#L112 - eq(true, match('a**', 'abc')) -- '**' should parse as two '*'s when not by itself in a path segment - eq(true, match('**c', 'abc')) - eq(false, match('a**', 'ab')) -- each '*' should still represent at least one character - eq(false, match('**c', 'bc')) - eq(true, match('a**', 'abcd')) - eq(true, match('**d', 'abcd')) - eq(false, match('a**', 'abc/d')) - eq(false, match('**d', 'abc/d')) + eq(true, match('a*b*c*d*e*', 'axbxcxdxe')) + eq(true, match('a*b*c*d*e*', 'axbxcxdxexxx')) + eq(true, match('a*b?c*x', 'abxbbxdbxebxczzx')) + eq(false, match('a*b?c*x', 'abxbbxdbxebxczzy')) + eq(true, match('a*b*[cy]*d*e*', 'axbxcxdxexxx')) + eq(true, match('a*b*[cy]*d*e*', 'axbxyxdxexxx')) + eq(true, match('a*b*[cy]*d*e*', 'axbxxxyxdxexxx')) end) it('should match ? wildcards', function() @@ -84,6 +82,11 @@ describe('glob', function() eq(true, match('??', 'ab')) eq(true, match('a?c', 'abc')) eq(false, match('a?c', 'a/c')) + eq(false, match('a/', 'a/.b')) + eq(true, match('?/?', 'a/b')) + eq(true, match('/??', '/ab')) + eq(true, match('/?b', '/ab')) + eq(false, match('foo?bar', 'foo/bar')) end) it('should match ** wildcards', function() @@ -99,7 +102,7 @@ describe('glob', function() eq(true, match('/**', '/')) eq(true, match('/**', '/a/b/c')) eq(true, match('**/', '')) -- **/ absorbs trailing / - eq(true, match('**/', '/a/b/c')) + eq(false, match('**/', '/a/b/c')) eq(true, match('**/**', '')) eq(true, match('**/**', 'a')) eq(false, match('a/**', '')) @@ -134,20 +137,9 @@ describe('glob', function() end) it('should match {} groups', function() - eq(true, match('{}', '')) - eq(false, match('{}', 'a')) - eq(true, match('a{}', 'a')) - eq(true, match('{}a', 'a')) eq(true, match('{,}', '')) eq(true, match('{a,}', '')) eq(true, match('{a,}', 'a')) - eq(true, match('{a}', 'a')) - eq(false, match('{a}', 'aa')) - eq(false, match('{a}', 'ab')) - eq(true, match('{a?c}', 'abc')) - eq(false, match('{ab}', 'a')) - eq(false, match('{ab}', 'b')) - eq(true, match('{ab}', 'ab')) eq(true, match('{a,b}', 'a')) eq(true, match('{a,b}', 'b')) eq(false, match('{a,b}', 'ab')) @@ -155,7 +147,22 @@ describe('glob', function() eq(false, match('{ab,cd}', 'a')) eq(true, match('{ab,cd}', 'cd')) eq(true, match('{a,b,c}', 'c')) - eq(false, match('{a,{b,c}}', 'c')) -- {} cannot nest + eq(true, match('{a,{b,c}}', 'c')) + eq(true, match('a{,/}*.txt', 'a.txt')) + eq(true, match('a{,/}*.txt', 'ab.txt')) + eq(true, match('a{,/}*.txt', 'a/b.txt')) + eq(true, match('a{,/}*.txt', 'a/ab.txt')) + eq(true, match('a/{a{a,b},b}', 'a/aa')) + eq(true, match('a/{a{a,b},b}', 'a/ab')) + eq(false, match('a/{a{a,b},b}', 'a/ac')) + eq(true, match('a/{a{a,b},b}', 'a/b')) + eq(false, match('a/{a{a,b},b}', 'a/c')) + eq(true, match('foo{bar,b*z}', 'foobar')) + eq(true, match('foo{bar,b*z}', 'foobuzz')) + eq(true, match('foo{bar,b*z}', 'foobarz')) + eq(true, match('{a,b}/c/{d,e}/**/*est.ts', 'a/c/d/one/two/three.test.ts')) + eq(true, match('{a,{d,e}b}/c', 'a/c')) + eq(true, match('{**/a,**/b}', 'b')) end) it('should match [] groups', function() @@ -181,6 +188,13 @@ describe('glob', function() eq(true, match('[a-zA-Z0-9]', 'Z')) eq(true, match('[a-zA-Z0-9]', '9')) eq(false, match('[a-zA-Z0-9]', '&')) + eq(true, match('[?]', '?')) + eq(false, match('[?]', 'a')) + eq(true, match('[*]', '*')) + eq(false, match('[*]', 'a')) + eq(true, match('[\\!]', '!')) + eq(true, match('a\\*b', 'a*b')) + eq(false, match('a\\*b', 'axb')) end) it('should match [!...] groups', function() @@ -202,8 +216,7 @@ describe('glob', function() it('should handle long patterns', function() -- lpeg has a recursion limit of 200 by default, make sure the grammar does trigger it on -- strings longer than that - local fill_200 = - 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + local fill_200 = ('a'):rep(200) eq(200, fill_200:len()) local long_lit = fill_200 .. 'a' eq(false, match(long_lit, 'b')) @@ -212,6 +225,21 @@ describe('glob', function() eq(true, match(long_pat, fill_200 .. 'a/b/c/d.c')) end) + -- New test for unicode patterns from assets + it('should match unicode patterns', function() + eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.ts')) + eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.tsx')) + eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.js')) + eq(true, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsx')) + eq(false, match('😎/¢£.{ts,tsx,js,jsx}', '😎/¢£.jsxxxxxxxx')) + eq(true, match('*é*', 'café noir')) + eq(true, match('caf*noir', 'café noir')) + eq(true, match('caf*noir', 'cafeenoir')) + eq(true, match('F[ë£a]', 'Fë')) + eq(true, match('F[ë£a]', 'F£')) + eq(true, match('F[ë£a]', 'Fa')) + end) + it('should match complex patterns', function() eq(false, match('**/*.{c,h}', '')) eq(false, match('**/*.{c,h}', 'c'))