refactor(treesitter): use scratch buffer for string parser #35988

This commit changes `languagetree.lua` so that it creates a scratch
buffer under the hood when dealing with string parsers. This will make
it much easier to just use extmarks whenever we need to track injection
trees in `languagetree.lua`. This also allows us to remove the
`treesitter.c` code for parsing a string directly.

Note that the string parser's scratch buffer has `set noeol nofixeol` so
that the parsed source exactly matches the passed in string.
This commit is contained in:
Riley Bruins
2025-10-02 15:33:18 -07:00
committed by GitHub
parent 8eb0964537
commit ac15b384a6
5 changed files with 66 additions and 62 deletions

View File

@@ -1468,10 +1468,10 @@ LanguageTree:register_cbs({cbs}, {recursive})
callbacks. callbacks.
LanguageTree:source() *LanguageTree:source()* LanguageTree:source() *LanguageTree:source()*
Returns the source content of the language tree (bufnr or string). Returns the source bufnr of the language tree.
Return: ~ Return: ~
(`integer|string`) (`integer`)
*LanguageTree:tree_for_range()* *LanguageTree:tree_for_range()*
LanguageTree:tree_for_range({range}, {opts}) LanguageTree:tree_for_range({range}, {opts})

View File

@@ -5,7 +5,7 @@ error('Cannot require a meta file')
---@alias TSLoggerCallback fun(logtype: 'parse'|'lex', msg: string) ---@alias TSLoggerCallback fun(logtype: 'parse'|'lex', msg: string)
---@class TSParser: userdata ---@class TSParser: userdata
---@field parse fun(self: TSParser, tree: TSTree?, source: integer|string, include_bytes: boolean, timeout_ns: integer?): TSTree?, (Range4|Range6)[] ---@field parse fun(self: TSParser, tree: TSTree?, source: integer, include_bytes: boolean, timeout_ns: integer?): TSTree?, (Range4|Range6)[]
---@field reset fun(self: TSParser) ---@field reset fun(self: TSParser)
---@field included_ranges fun(self: TSParser, include_bytes: boolean?): integer[] ---@field included_ranges fun(self: TSParser, include_bytes: boolean?): integer[]
---@field set_included_ranges fun(self: TSParser, ranges: (Range6|TSNode)[]) ---@field set_included_ranges fun(self: TSParser, ranges: (Range6|TSNode)[])

View File

@@ -99,7 +99,8 @@ local TSCallbackNames = {
---taken from _trees. This is mostly a short-lived cache for included_regions() ---taken from _trees. This is mostly a short-lived cache for included_regions()
---@field private _lang string Language name ---@field private _lang string Language name
---@field private _parent? vim.treesitter.LanguageTree Parent LanguageTree ---@field private _parent? vim.treesitter.LanguageTree Parent LanguageTree
---@field private _source (integer|string) Buffer or string to parse ---@field private _source integer Buffer to parse
---@field private _has_scratch_buf boolean Whether _source is a |scratch-buffer| for string parsing.
---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language). ---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
---Each key is the index of region, which is synced with _regions and _valid. ---Each key is the index of region, which is synced with _regions and _valid.
---@field private _valid_regions table<integer,true> Set of valid region IDs. ---@field private _valid_regions table<integer,true> Set of valid region IDs.
@@ -134,11 +135,26 @@ function LanguageTree.new(source, lang, opts)
source = vim.api.nvim_get_current_buf() source = vim.api.nvim_get_current_buf()
end end
local has_scratch_buf = false
if type(source) == 'string' then
local new_source = vim.api.nvim_create_buf(false, true)
if new_source == 0 then
error('Unable to create buffer for string parser')
end
vim.bo[new_source].fixeol = false
vim.bo[new_source].eol = false
vim.api.nvim_buf_set_lines(new_source, 0, -1, false, vim.split(source, '\n', { plain = true }))
source = new_source
has_scratch_buf = true
end
local injections = opts.injections or {} local injections = opts.injections or {}
--- @class vim.treesitter.LanguageTree --- @class vim.treesitter.LanguageTree
local self = { local self = {
_source = source, _source = source,
_has_scratch_buf = has_scratch_buf,
_lang = lang, _lang = lang,
_children = {}, _children = {},
_trees = {}, _trees = {},
@@ -174,8 +190,7 @@ end
--- @private --- @private
function LanguageTree:_set_logger() function LanguageTree:_set_logger()
local source = self:source() local source = tostring(self:source())
source = type(source) == 'string' and 'text' or tostring(source)
local lang = self:lang() local lang = self:lang()
@@ -365,8 +380,8 @@ function LanguageTree:children()
return self._children return self._children
end end
--- Returns the source content of the language tree (bufnr or string). --- Returns the source bufnr of the language tree.
--- @return integer|string --- @return integer
function LanguageTree:source() function LanguageTree:source()
return self._source return self._source
end end
@@ -515,9 +530,8 @@ function LanguageTree:_async_parse(range, on_parse)
end end
local source = self._source local source = self._source
local is_buffer_parser = type(source) == 'number' local buf = vim.b[source]
local buf = is_buffer_parser and vim.b[source] or nil local ct = buf.changedtick
local ct = is_buffer_parser and buf.changedtick or nil
local total_parse_time = 0 local total_parse_time = 0
local redrawtime = vim.o.redrawtime * 1000000 local redrawtime = vim.o.redrawtime * 1000000
@@ -527,10 +541,7 @@ function LanguageTree:_async_parse(range, on_parse)
local parse = coroutine.wrap(self._parse) local parse = coroutine.wrap(self._parse)
local function step() local function step()
if is_buffer_parser then if not vim.api.nvim_buf_is_valid(source) then
if
not vim.api.nvim_buf_is_valid(source --[[@as number]])
then
return nil return nil
end end
@@ -540,7 +551,6 @@ function LanguageTree:_async_parse(range, on_parse)
total_parse_time = 0 total_parse_time = 0
parse = coroutine.wrap(self._parse) parse = coroutine.wrap(self._parse)
end end
end
thread_state.timeout = not vim.g._ts_force_sync_parsing and default_parse_timeout_ns or nil thread_state.timeout = not vim.g._ts_force_sync_parsing and default_parse_timeout_ns or nil
local parse_time, trees, finished = tcall(parse, self, range, thread_state) local parse_time, trees, finished = tcall(parse, self, range, thread_state)
@@ -725,6 +735,10 @@ end
--- `remove_child` must be called on the parent to remove it. --- `remove_child` must be called on the parent to remove it.
function LanguageTree:destroy() function LanguageTree:destroy()
-- Cleanup here -- Cleanup here
if self._has_scratch_buf then
self._has_scratch_buf = false
vim.api.nvim_buf_delete(self._source, {})
end
for _, child in pairs(self._children) do for _, child in pairs(self._children) do
child:destroy() child:destroy()
end end
@@ -842,7 +856,7 @@ function LanguageTree:included_regions()
end end
---@param node TSNode ---@param node TSNode
---@param source string|integer ---@param source integer
---@param metadata vim.treesitter.query.TSMetadata ---@param metadata vim.treesitter.query.TSMetadata
---@param include_children boolean ---@param include_children boolean
---@return Range6[] ---@return Range6[]

View File

@@ -513,24 +513,12 @@ static int parser_parse(lua_State *L)
old_tree = ud ? ud->tree : NULL; old_tree = ud ? ud->tree : NULL;
} }
TSTree *new_tree = NULL; if (lua_type(L, 3) != LUA_TNUMBER) {
size_t len; return luaL_argerror(L, 3, "expected buffer handle");
const char *str; }
handle_T bufnr;
buf_T *buf;
TSInput input;
// This switch is necessary because of the behavior of lua_isstring, that handle_T bufnr = (handle_T)lua_tointeger(L, 3);
// consider numbers as strings... buf_T *buf = handle_get_buffer(bufnr);
switch (lua_type(L, 3)) {
case LUA_TSTRING:
str = lua_tolstring(L, 3, &len);
new_tree = ts_parser_parse_string(p, old_tree, str, (uint32_t)len);
break;
case LUA_TNUMBER:
bufnr = (handle_T)lua_tointeger(L, 3);
buf = handle_get_buffer(bufnr);
if (!buf) { if (!buf) {
#define BUFSIZE 256 #define BUFSIZE 256
@@ -540,7 +528,9 @@ static int parser_parse(lua_State *L)
#undef BUFSIZE #undef BUFSIZE
} }
input = (TSInput){ (void *)buf, input_cb, TSInputEncodingUTF8, NULL }; TSInput input = (TSInput){ (void *)buf, input_cb, TSInputEncodingUTF8, NULL };
TSTree *new_tree = NULL;
if (!lua_isnil(L, 5)) { if (!lua_isnil(L, 5)) {
uint64_t timeout_ns = (uint64_t)lua_tointeger(L, 5); uint64_t timeout_ns = (uint64_t)lua_tointeger(L, 5);
TSLuaParserCallbackPayload payload = TSLuaParserCallbackPayload payload =
@@ -553,12 +543,6 @@ static int parser_parse(lua_State *L)
new_tree = ts_parser_parse(p, old_tree, input); new_tree = ts_parser_parse(p, old_tree, input);
} }
break;
default:
return luaL_argerror(L, 3, "expected either string or buffer handle");
}
bool include_bytes = (lua_gettop(L) >= 4) && lua_toboolean(L, 4); bool include_bytes = (lua_gettop(L) >= 4) && lua_toboolean(L, 4);
if (!new_tree) { if (!new_tree) {

View File

@@ -1264,6 +1264,12 @@ print()
parser:for_each_tree(function(tstree, tree) parser:for_each_tree(function(tstree, tree)
ranges[tree:lang()] = { tstree:root():range(true) } ranges[tree:lang()] = { tstree:root():range(true) }
end) end)
-- Scratch buffer should get cleaned up
assert(vim.api.nvim_buf_is_loaded(parser:source()))
parser:destroy()
assert(not vim.api.nvim_buf_is_loaded(parser:source()))
return ranges return ranges
end) end)