treesitter: refactor and use lua regexes

This commit is contained in:
Thomas Vigouroux
2020-07-14 21:50:57 +02:00
parent 7f49594813
commit 613068071e
5 changed files with 257 additions and 175 deletions

View File

@@ -1,4 +1,6 @@
local a = vim.api
local query = require'vim.treesitter.query'
local language = require'vim.treesitter.language'
-- TODO(bfredl): currently we retain parsers for the lifetime of the buffer.
-- Consider use weak references to release parser if all plugins are done with
@@ -44,42 +46,30 @@ function Parser:set_included_ranges(ranges)
self.valid = false
end
local M = {
parse_query = vim._ts_parse_query,
}
local M = vim.tbl_extend("error", query, language)
setmetatable(M, {
__index = function (t, k)
if k == "TSHighlighter" then
t[k] = require'vim.tshighlighter'
a.nvim_err_writeln("vim.TSHighlighter is deprecated, please use vim.treesitter.highlighter")
t[k] = require'vim.treesitter.highlighter'
return t[k]
elseif k == "highlighter" then
t[k] = require'vim.treesitter.highlighter'
return t[k]
end
end
})
function M.require_language(lang, path)
if vim._ts_has_language(lang) then
return true
end
if path == nil then
local fname = 'parser/' .. lang .. '.*'
local paths = a.nvim_get_runtime_file(fname, false)
if #paths == 0 then
-- TODO(bfredl): help tag?
error("no parser for '"..lang.."' language")
end
path = paths[1]
end
vim._ts_add_language(path, lang)
end
function M.inspect_language(lang)
M.require_language(lang)
return vim._ts_inspect_language(lang)
end
function M.create_parser(bufnr, lang, id)
M.require_language(lang)
--- Creates a new parser.
--
-- It is not recommended to use this, use vim.treesitter.get_parser() instead.
--
-- @param bufnr The buffer the parser will be tied to
-- @param lang The language of the parser.
-- @param id The id the parser will have
function M._create_parser(bufnr, lang, id)
language.require_language(lang)
if bufnr == 0 then
bufnr = a.nvim_get_current_buf()
end
@@ -108,17 +98,31 @@ function M.create_parser(bufnr, lang, id)
return self
end
function M.get_parser(bufnr, ft, buf_attach_cbs)
--- Gets the parser for this bufnr / ft combination.
--
-- If needed this will create the parser.
-- Unconditionnally attach the provided callback
--
-- @param bufnr The buffer the parser should be tied to
-- @param ft The filetype of this parser
-- @param buf_attach_cbs An `nvim_buf_attach`-like table argument with the following keys :
-- `on_lines` : see `nvim_buf_attach`, but this will be called _after_ the parsers callback.
-- `on_changedtree` : a callback that will be called everytime the tree has syntactical changes.
-- it will only be passed one argument, that is a table of the ranges (as node ranges) that
-- changed.
--
-- @returns The parser
function M.get_parser(bufnr, lang, buf_attach_cbs)
if bufnr == nil or bufnr == 0 then
bufnr = a.nvim_get_current_buf()
end
if ft == nil then
ft = a.nvim_buf_get_option(bufnr, "filetype")
if lang == nil then
lang = a.nvim_buf_get_option(bufnr, "filetype")
end
local id = tostring(bufnr)..'_'..ft
local id = tostring(bufnr)..'_'..lang
if parsers[id] == nil then
parsers[id] = M.create_parser(bufnr, ft, id)
parsers[id] = M._create_parser(bufnr, lang, id)
end
if buf_attach_cbs and buf_attach_cbs.on_changedtree then
@@ -132,129 +136,4 @@ function M.get_parser(bufnr, ft, buf_attach_cbs)
return parsers[id]
end
-- query: pattern matching on trees
-- predicate matching is implemented in lua
local Query = {}
Query.__index = Query
local magic_prefixes = {['\\v']=true, ['\\m']=true, ['\\M']=true, ['\\V']=true}
local function check_magic(str)
if string.len(str) < 2 or magic_prefixes[string.sub(str,1,2)] then
return str
end
return '\\v'..str
end
function M.parse_query(lang, query)
M.require_language(lang)
local self = setmetatable({}, Query)
self.query = vim._ts_parse_query(lang, vim.fn.escape(query,'\\'))
self.info = self.query:inspect()
self.captures = self.info.captures
self.regexes = {}
for id,preds in pairs(self.info.patterns) do
local regexes = {}
for i, pred in ipairs(preds) do
if (pred[1] == "match?" and type(pred[2]) == "number"
and type(pred[3]) == "string") then
regexes[i] = vim.regex(check_magic(pred[3]))
end
end
if next(regexes) then
self.regexes[id] = regexes
end
end
return self
end
local function get_node_text(node, bufnr)
local start_row, start_col, end_row, end_col = node:range()
if start_row ~= end_row then
return nil
end
local line = a.nvim_buf_get_lines(bufnr, start_row, start_row+1, true)[1]
return string.sub(line, start_col+1, end_col)
end
function Query:match_preds(match, pattern, bufnr)
local preds = self.info.patterns[pattern]
if not preds then
return true
end
local regexes = self.regexes[pattern]
for i, pred in pairs(preds) do
-- Here we only want to return if a predicate DOES NOT match, and
-- continue on the other case. This way unknown predicates will not be considered,
-- which allows some testing and easier user extensibility (#12173).
-- Also, tree-sitter strips the leading # from predicates for us.
if pred[1] == "eq?" then
local node = match[pred[2]]
local node_text = get_node_text(node, bufnr)
local str
if type(pred[3]) == "string" then
-- (#eq? @aa "foo")
str = pred[3]
else
-- (#eq? @aa @bb)
str = get_node_text(match[pred[3]], bufnr)
end
if node_text ~= str or str == nil then
return false
end
elseif pred[1] == "match?" then
if not regexes or not regexes[i] then
return false
end
local node = match[pred[2]]
local start_row, start_col, end_row, end_col = node:range()
if start_row ~= end_row then
return false
end
if not regexes[i]:match_line(bufnr, start_row, start_col, end_col) then
return false
end
end
end
return true
end
function Query:iter_captures(node, bufnr, start, stop)
if bufnr == 0 then
bufnr = vim.api.nvim_get_current_buf()
end
local raw_iter = node:_rawquery(self.query,true,start,stop)
local function iter()
local capture, captured_node, match = raw_iter()
if match ~= nil then
local active = self:match_preds(match, match.pattern, bufnr)
match.active = active
if not active then
return iter() -- tail call: try next match
end
end
return capture, captured_node
end
return iter
end
function Query:iter_matches(node, bufnr, start, stop)
if bufnr == 0 then
bufnr = vim.api.nvim_get_current_buf()
end
local raw_iter = node:_rawquery(self.query,false,start,stop)
local function iter()
local pattern, match = raw_iter()
if match ~= nil then
local active = self:match_preds(match, pattern, bufnr)
if not active then
return iter() -- tail call: try next match
end
end
return pattern, match
end
return iter
end
return M

View File

@@ -11,16 +11,49 @@ local ts_hs_ns = a.nvim_create_namespace("treesitter_hl")
-- go through a few tree-sitter provided queries and decide
-- on translations that makes the most sense.
TSHighlighter.hl_map = {
keyword="Keyword",
string="String",
type="Type",
comment="Comment",
constant="Constant",
operator="Operator",
number="Number",
label="Label",
["error"] = "Error",
-- Miscs
["comment"] = "Comment",
["punctuation.delimiter"] = "Delimiter",
["punctuation.bracket"] = "Delimiter",
["punctuation.special"] = "Delimiter",
-- Constants
["constant"] = "Constant",
["constant.builtin"] = "Special",
["constant.macro"] = "Define",
["string"] = "String",
["string.regex"] = "String",
["string.escape"] = "SpecialChar",
["character"] = "Character",
["number"] = "Number",
["boolean"] = "Boolean",
["float"] = "Float",
-- Functions
["function"] = "Function",
["function.special"] = "Function",
["function.builtin"] = "Special",
["function.macro"] = "Macro",
["parameter"] = "Identifier",
["method"] = "Function",
["field"] = "Identifier",
["property"] = "Identifier",
["constructor"] = "Special",
-- Keywords
["conditional"] = "Conditional",
["repeat"] = "Repeat",
["label"] = "Label",
["operator"] = "Operator",
["keyword"] = "Keyword",
["exception"] = "Exception",
["type"] = "Type",
["type.builtin"] = "Type",
["structure"] = "Structure",
["include"] = "Include",
}
function TSHighlighter.new(query, bufnr, ft)
@@ -75,7 +108,18 @@ end
function TSHighlighter:set_query(query)
if type(query) == "string" then
query = vim.treesitter.parse_query(self.parser.lang, query)
elseif query == nil then
query = vim.treesitter.get_query(self.parser.lang, 'highlights')
if query == nil then
a.err_writeln("No highlights.scm query found for " .. self.parser.lang)
if query == nil then
query = vim.treesitter.parse_query(self.parser.lang, "")
end
end
end
self.query = query
self.hl_cache = setmetatable({}, {

View File

@@ -0,0 +1,26 @@
local a = vim.api
local M = {}
function M.require_language(lang, path)
if vim._ts_has_language(lang) then
return true
end
if path == nil then
local fname = 'parser/' .. lang .. '.*'
local paths = a.nvim_get_runtime_file(fname, false)
if #paths == 0 then
-- TODO(bfredl): help tag?
error("no parser for '"..lang.."' language")
end
path = paths[1]
end
vim._ts_add_language(path, lang)
end
function M.inspect_language(lang)
M.require_language(lang)
return vim._ts_inspect_language(lang)
end
return M

View File

@@ -0,0 +1,133 @@
local a = vim.api
local language = require'vim.treesitter.language'
-- query: pattern matching on trees
-- predicate matching is implemented in lua
local Query = {}
Query.__index = Query
local M = {}
--- Parses a query.
--
-- @param language The language
-- @param query A string containing the query (s-expr syntax)
--
-- @returns The query
function M.parse_query(lang, query)
language.require_language(lang)
local self = setmetatable({}, Query)
self.query = vim._ts_parse_query(lang, vim.fn.escape(query,'\\'))
self.info = self.query:inspect()
self.captures = self.info.captures
return self
end
-- TODO(vigoux): support multiline nodes too
local function get_node_text(node, bufnr)
local start_row, start_col, end_row, end_col = node:range()
if start_row ~= end_row then
return nil
end
local line = a.nvim_buf_get_lines(bufnr, start_row, start_row+1, true)[1]
return string.sub(line, start_col+1, end_col)
end
-- Predicate handler receive the following arguments
-- (match, pattern, bufnr, regexes, index, predicate)
local predicate_handlers = {
["eq?"] = function(match, _, bufnr, predicate)
local node = match[predicate[2]]
local node_text = get_node_text(node, bufnr)
local str
if type(predicate[3]) == "string" then
-- (#eq? @aa "foo")
str = predicate[3]
else
-- (#eq? @aa @bb)
str = get_node_text(match[predicate[3]], bufnr)
end
if node_text ~= str or str == nil then
return false
end
return true
end,
["match?"] = function(match, _, bufnr, predicate)
local node = match[predicate[2]]
local regex = predicate[3]
local start_row, _, end_row, _ = node:range()
if start_row ~= end_row then
return false
end
return string.find(get_node_text(node, bufnr), regex)
end,
}
function M.add_predicate(name, handler)
if predicate_handlers[name] then
a.nvim_err_writeln("It is recomended to not overwrite predicates.")
end
predicate_handlers[name] = handler
end
function Query:match_preds(match, pattern, bufnr)
local preds = self.info.patterns[pattern]
if not preds then
return true
end
for _, pred in pairs(preds) do
-- Here we only want to return if a predicate DOES NOT match, and
-- continue on the other case. This way unknown predicates will not be considered,
-- which allows some testing and easier user extensibility (#12173).
-- Also, tree-sitter strips the leading # from predicates for us.
if predicate_handlers[pred[1]] and
not predicate_handlers[pred[1]](match, pattern, bufnr, pred) then
return false
end
end
return true
end
function Query:iter_captures(node, bufnr, start, stop)
if bufnr == 0 then
bufnr = vim.api.nvim_get_current_buf()
end
local raw_iter = node:_rawquery(self.query, true, start, stop)
local function iter()
local capture, captured_node, match = raw_iter()
if match ~= nil then
local active = self:match_preds(match, match.pattern, bufnr)
match.active = active
if not active then
return iter() -- tail call: try next match
end
end
return capture, captured_node
end
return iter
end
function Query:iter_matches(node, bufnr, start, stop)
if bufnr == 0 then
bufnr = vim.api.nvim_get_current_buf()
end
local raw_iter = node:_rawquery(self.query, false, start, stop)
local function iter()
local pattern, match = raw_iter()
if match ~= nil then
local active = self:match_preds(match, pattern, bufnr)
if not active then
return iter() -- tail call: try next match
end
end
return pattern, match
end
return iter
end
return M

View File

@@ -15,14 +15,14 @@ before_each(clear)
describe('treesitter API', function()
-- error tests not requiring a parser library
it('handles missing language', function()
eq("Error executing lua: .../treesitter.lua: no parser for 'borklang' language",
pcall_err(exec_lua, "parser = vim.treesitter.create_parser(0, 'borklang')"))
eq("Error executing lua: .../language.lua: no parser for 'borklang' language, see :help treesitter-parsers",
pcall_err(exec_lua, "parser = vim.treesitter.get_parser(0, 'borklang')"))
-- actual message depends on platform
matches("Error executing lua: Failed to load parser: uv_dlopen: .+",
pcall_err(exec_lua, "parser = vim.treesitter.require_language('borklang', 'borkbork.so')"))
eq("Error executing lua: .../treesitter.lua: no parser for 'borklang' language",
eq("Error executing lua: .../language.lua: no parser for 'borklang' language, see :help treesitter-parsers",
pcall_err(exec_lua, "parser = vim.treesitter.inspect_language('borklang')"))
end)
@@ -246,7 +246,7 @@ static int nlua_schedule(lua_State *const lstate)
; defaults to very magic syntax, for best compatibility
((identifier) @Identifier (#match? @Identifier "^l(u)a_"))
; still support \M etc prefixes
((identifier) @Constant (#match? @Constant "\M^\[A-Z_]\+$"))
((identifier) @Constant (#match? @Constant "^[A-Z_]+$"))
((binary_expression left: (identifier) @WarningMsg.left right: (identifier) @WarningMsg.right) (#eq? @WarningMsg.left @WarningMsg.right))
@@ -292,9 +292,9 @@ static int nlua_schedule(lua_State *const lstate)
]]}
exec_lua([[
local TSHighlighter = vim.treesitter.TSHighlighter
local highlighter = vim.treesitter.highlighter
local query = ...
test_hl = TSHighlighter.new(query, 0, "c")
test_hl = highlighter.new(query, 0, "c")
]], hl_query)
screen:expect{grid=[[
{2:/// Schedule Lua callback on main loop's event queue} |