fix(treesitter): don't add final newline if not present #35970

**Problem(?):** Buffers that (for whatever reason) aren't meant to have
a final newline are still parsed with a final newline in `treesitter.c`.

**Solution:** Don't add the newline to the last buffer line if it
shouldn't be there. (This more closely matches the approach of
`read_buffer_into()`.)

This allows us to, say, use a scratch buffer with `noeol` and `nofixeol`
behind the scenes in `get_string_parser()`.

...which would allow us to track injection trees with extmarks in that
case.

...which would allow us to not drop previous trees after reparsing a
different range with `get_parser():parse()`.

...which would prevent flickering when editing a buffer that has 2+
windows to it in view at a time.

...which would allow us to keep our sanity!!!

(one step at a time...)
This commit is contained in:
Riley Bruins
2025-10-01 17:31:52 -07:00
committed by GitHub
parent 89dc889c3d
commit b4016f4525
2 changed files with 64 additions and 6 deletions

View File

@@ -443,8 +443,9 @@ static const char *input_cb(void *payload, uint32_t byte_index, TSPoint position
*bytes_read = 0;
return "";
}
char *line = ml_get_buf(bp, (linenr_T)position.row + 1);
size_t len = (size_t)ml_get_buf_len(bp, (linenr_T)position.row + 1);
linenr_T lnum = (linenr_T)position.row + 1;
char *line = ml_get_buf(bp, lnum);
size_t len = (size_t)ml_get_buf_len(bp, lnum);
if (position.column > len) {
*bytes_read = 0;
return "";
@@ -456,10 +457,13 @@ static const char *input_cb(void *payload, uint32_t byte_index, TSPoint position
memchrsub(buf, '\n', NUL, tocopy);
*bytes_read = (uint32_t)tocopy;
if (tocopy < BUFSIZE) {
// now add the final \n. If it didn't fit, input_cb will be called again
// on the same line with advanced column.
buf[tocopy] = '\n';
(*bytes_read)++;
// now add the final \n, if it is meant to be present for this buffer. If it didn't fit,
// input_cb will be called again on the same line with advanced column.
if (lnum != bp->b_ml.ml_line_count || (!bp->b_p_bin && bp->b_p_fixeol)
|| (lnum != bp->b_no_eol_lnum && bp->b_p_eol)) {
buf[tocopy] = '\n';
(*bytes_read)++;
}
}
return buf;
#undef BUFSIZE

View File

@@ -91,6 +91,60 @@ describe('treesitter parser API', function()
eq(true, exec_lua('return parser:parse()[1] == tree2'))
end)
it('respects eol settings when parsing buffer', function()
insert([[
int main() {
int x = 3;
} // :D]])
exec_lua(function()
vim.bo.eol = false
vim.bo.fixeol = false
_G.parser = vim.treesitter.get_parser(0, 'c')
_G.tree = _G.parser:parse()[1]
_G.root = _G.tree:root()
_G.lang = vim.treesitter.language.inspect('c')
end)
eq(
'<node translation_unit>',
exec_lua(function()
return tostring(_G.root)
end)
)
eq(
{ 0, 0, 0, 2, 7, 33 },
exec_lua(function()
return { _G.root:range(true) }
end)
)
-- NOTE: Changing these settings marks the buffer as `modified` but does not fire `on_bytes`,
-- meaning this test case does not pass... is this intended?
-- exec_lua(function()
-- vim.bo.eol = true
-- vim.bo.fixeol = true
-- vim.cmd.update()
-- _G.parser = vim.treesitter.get_parser(0, 'c')
-- _G.tree = _G.parser:parse()[1]
-- _G.root = _G.tree:root()
-- _G.lang = vim.treesitter.language.inspect('c')
-- end)
--
-- eq(
-- '<node translation_unit>',
-- exec_lua(function()
-- return tostring(root)
-- end)
-- )
-- eq(
-- { 0, 0, 0, 3, 0, 34 },
-- exec_lua(function()
-- return { root:range(true) }
-- end)
-- )
end)
it('parses buffer asynchronously', function()
insert([[
int main() {