-- Converts Vim :help files to HTML. Validates |tag| links and document syntax (parser errors). -- -- NOTE: :helptags checks for duplicate tags, whereas this script checks _links_ (to tags). -- -- USAGE (GENERATE HTML): -- 1. Run `make helptags` first; this script depends on vim.fn.taglist(). -- 2. nvim -V1 -es --clean +"lua require('scripts.gen_help_html').gen('./build/runtime/doc/', 'target/dir/')" -- - Read the docstring at gen(). -- 3. cd target/dir/ && jekyll serve --host 0.0.0.0 -- 4. Visit http://localhost:4000/…/help.txt.html -- -- USAGE (VALIDATE): -- 1. nvim -V1 -es +"lua require('scripts.gen_help_html').validate()" -- - validate() is 10x faster than gen(), so it is used in CI. -- -- SELF-TEST MODE: -- 1. nvim -V1 -es +"lua require('scripts.gen_help_html')._test()" -- -- NOTES: -- * gen() and validate() are the primary entrypoints. validate() only exists because gen() is too -- slow (~1 min) to run in per-commit CI. -- * visit_node() is the core function used by gen() to traverse the document tree and produce HTML. -- * visit_validate() is the core function used by validate(). -- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout. local tagmap = nil local helpfiles = nil local invalid_links = {} local invalid_urls = {} local invalid_spelling = {} local spell_dict = { Neovim = 'Nvim', NeoVim = 'Nvim', neovim = 'Nvim', lua = 'Lua', VimL = 'Vimscript', vimL = 'Vimscript', viml = 'Vimscript', } local language = nil local M = {} -- These files are generated with "flow" layout (non fixed-width, wrapped text paragraphs). -- All other files are "legacy" files which require fixed-width layout. local new_layout = { ['api.txt'] = true, ['lsp.txt'] = true, ['channel.txt'] = true, ['deprecated.txt'] = true, ['develop.txt'] = true, ['lua.txt'] = true, ['luaref.txt'] = true, ['news.txt'] = true, ['nvim.txt'] = true, ['pi_health.txt'] = true, ['provider.txt'] = true, ['ui.txt'] = true, } -- TODO: These known invalid |links| require an update to the relevant docs. local exclude_invalid = { ["'previewpopup'"] = "quickref.txt", ["'pvp'"] = "quickref.txt", ["'string'"] = "eval.txt", Query = 'treesitter.txt', ['eq?'] = 'treesitter.txt', matchit = 'vim_diff.txt', ['matchit.txt'] = 'help.txt', ["set!"] = "treesitter.txt", ['v:_null_blob'] = 'builtin.txt', ['v:_null_dict'] = 'builtin.txt', ['v:_null_list'] = 'builtin.txt', ['v:_null_string'] = 'builtin.txt', } -- False-positive "invalid URLs". local exclude_invalid_urls = { ["http://"] = "usr_23.txt", ["http://."] = "usr_23.txt", ["http://aspell.net/man-html/Affix-Compression.html"] = "spell.txt", ["http://aspell.net/man-html/Phonetic-Code.html"] = "spell.txt", ["http://canna.sourceforge.jp/"] = "mbyte.txt", ["http://gnuada.sourceforge.net"] = "ft_ada.txt", ["http://lua-users.org/wiki/StringLibraryTutorial"] = "lua.txt", ["http://michael.toren.net/code/"] = "pi_tar.txt", ["http://papp.plan9.de"] = "syntax.txt", ["http://wiki.services.openoffice.org/wiki/Dictionaries"] = "spell.txt", ["http://www.adapower.com"] = "ft_ada.txt", ["http://www.jclark.com/"] = "quickfix.txt", } -- Deprecated, brain-damaged files that I don't care about. local ignore_errors = { ['pi_netrw.txt'] = true, } local function tofile(fname, text) local f = io.open(fname, 'w') if not f then error(('failed to write: %s'):format(f)) else f:write(text) f:close() end end local function html_esc(s) return s:gsub( '&', '&'):gsub( '<', '<'):gsub( '>', '>') end local function url_encode(s) -- Credit: tpope / vim-unimpaired -- NOTE: these chars intentionally *not* escaped: ' ( ) return vim.fn.substitute(vim.fn.iconv(s, 'latin1', 'utf-8'), [=[[^A-Za-z0-9()'_.~-]]=], [=[\="%".printf("%02X",char2nr(submatch(0)))]=], 'g') end local function expandtabs(s) return s:gsub('\t', (' '):rep(8)) end local function to_titlecase(s) local text = '' for w in vim.gsplit(s, '[ \t]+') do text = ('%s %s%s'):format(text, vim.fn.toupper(w:sub(1, 1)), w:sub(2)) end return text end local function to_heading_tag(text) -- Prepend "_" to avoid conflicts with actual :help tags. return text and string.format('_%s', vim.fn.tolower((text:gsub('%s+', '-')))) or 'unknown' end local function basename_noext(f) return vim.fs.basename(f:gsub('%.txt', '')) end local function is_blank(s) return not not s:find([[^[\t ]*$]]) end local function trim(s, dir) return vim.fn.trim(s, '\r\t\n ', dir or 0) end --- Removes common punctuation from URLs. --- --- TODO: fix this in the parser instead... https://github.com/neovim/tree-sitter-vimdoc --- --- @returns (fixed_url, removed_chars) where `removed_chars` is in the order found in the input. local function fix_url(url) local removed_chars = '' local fixed_url = url -- Remove up to one of each char from end of the URL, in this order. for _, c in ipairs({ '.', ')', }) do if fixed_url:sub(-1) == c then removed_chars = c .. removed_chars fixed_url = fixed_url:sub(1, -2) end end return fixed_url, removed_chars end --- Checks if a given line is a "noise" line that doesn't look good in HTML form. local function is_noise(line, noise_lines) if ( -- First line is always noise. (noise_lines ~= nil and vim.tbl_count(noise_lines) == 0) or line:find('Type .*gO.* to see the table of contents') -- Title line of traditional :help pages. -- Example: "NVIM REFERENCE MANUAL by ..." or line:find([[^%s*N?VIM[ \t]*REFERENCE[ \t]*MANUAL]]) -- First line of traditional :help pages. -- Example: "*api.txt* Nvim" or line:find('%s*%*?[a-zA-Z]+%.txt%*?%s+N?[vV]im%s*$') -- modeline -- Example: "vim:tw=78:ts=8:sw=4:sts=4:et:ft=help:norl:" or line:find('^%s*vim?%:.*ft=help') or line:find('^%s*vim?%:.*filetype=help') or line:find('[*>]local%-additions[*<]') ) then -- table.insert(stats.noise_lines, getbuflinestr(root, opt.buf, 0)) table.insert(noise_lines or {}, line) return true end return false end --- Creates a github issue URL at neovim/tree-sitter-vimdoc with prefilled content. local function get_bug_url_vimdoc(fname, to_fname, sample_text) local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) local bug_url = ('https://github.com/neovim/tree-sitter-vimdoc/issues/new?labels=bug&title=parse+error%3A+' ..vim.fs.basename(fname) ..'+&body=Found+%60tree-sitter-vimdoc%60+parse+error+at%3A+' ..this_url ..'%0D%0DContext%3A%0D%0D%60%60%60%0D' ..url_encode(sample_text) ..'%0D%60%60%60') return bug_url end --- Creates a github issue URL at neovim/neovim with prefilled content. local function get_bug_url_nvim(fname, to_fname, sample_text, token_name) local this_url = string.format('https://neovim.io/doc/user/%s', vim.fs.basename(to_fname)) local bug_url = ('https://github.com/neovim/neovim/issues/new?labels=bug&title=user+docs+HTML%3A+' ..vim.fs.basename(fname) ..'+&body=%60gen_help_html.lua%60+problem+at%3A+' ..this_url ..'%0D' ..(token_name and '+unhandled+token%3A+%60'..token_name..'%60' or '') ..'%0DContext%3A%0D%0D%60%60%60%0D' ..url_encode(sample_text) ..'%0D%60%60%60') return bug_url end --- Gets a "foo.html" name from a "foo.txt" helpfile name. local function get_helppage(f) if not f then return nil end -- Special case: help.txt is the "main landing page" of :help files, not index.txt. if f == 'index.txt' then return 'vimindex.html' elseif f == 'help.txt' then return 'index.html' end return (f:gsub('%.txt$', '.html')) end --- Counts leading spaces (tab=8) to decide the indent size of multiline text. --- --- Blank lines (empty or whitespace-only) are ignored. local function get_indent(s) local min_indent = nil for line in vim.gsplit(s, '\n') do if line and not is_blank(line) then local ws = expandtabs(line:match('^%s+') or '') min_indent = (not min_indent or ws:len() < min_indent) and ws:len() or min_indent end end return min_indent or 0 end --- Removes the common indent level, after expanding tabs to 8 spaces. local function trim_indent(s) local indent_size = get_indent(s) local trimmed = '' for line in vim.gsplit(s, '\n') do line = expandtabs(line) trimmed = ('%s%s\n'):format(trimmed, line:sub(indent_size + 1)) end return trimmed:sub(1, -2) end --- Gets raw buffer text in the node's range (+/- an offset), as a newline-delimited string. local function getbuflinestr(node, bufnr, offset) local line1, _, line2, _ = node:range() line1 = line1 - offset line2 = line2 + offset local lines = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1) return table.concat(lines, '\n') end --- Gets the whitespace just before `node` from the raw buffer text. --- Needed for preformatted `old` lines. local function getws(node, bufnr) local line1, c1, line2, _ = node:range() local raw = vim.fn.getbufline(bufnr, line1 + 1, line2 + 1)[1] local text_before = raw:sub(1, c1) local leading_ws = text_before:match('%s+$') or '' return leading_ws end local function get_tagname(node, bufnr) local text = vim.treesitter.get_node_text(node, bufnr) local tag = (node:type() == 'optionlink' or node:parent():type() == 'optionlink') and ("'%s'"):format(text) or text local helpfile = vim.fs.basename(tagmap[tag]) or nil -- "api.txt" local helppage = get_helppage(helpfile) -- "api.html" return helppage, tag end --- Returns true if the given invalid tagname is a false positive. local function ignore_invalid(s) return not not ( exclude_invalid[s] -- Strings like |~/====| appear in various places and the parser thinks they are links, but they -- are just table borders. or s:find('===') or s:find('%-%-%-') ) end local function ignore_parse_error(fname, s) if ignore_errors[vim.fs.basename(fname)] then return true end return ( -- Ignore parse errors for unclosed tag. -- This is common in vimdocs and is treated as plaintext by :help. s:find("^[`'|*]") ) end local function has_ancestor(node, ancestor_name) local p = node while true do p = p:parent() if not p or p:type() == 'help_file' then break elseif p:type() == ancestor_name then return true end end return false end --- Gets the first matching child node matching `name`. local function first(node, name) for c, _ in node:iter_children() do if c:named() and c:type() == name then return c end end return nil end local function validate_link(node, bufnr, fname) local helppage, tagname = get_tagname(node:child(1), bufnr) local ignored = false if not tagmap[tagname] then ignored = has_ancestor(node, 'column_heading') or node:has_error() or ignore_invalid(tagname) if not ignored then invalid_links[tagname] = vim.fs.basename(fname) end end return helppage, tagname, ignored end --- TODO: port the logic from scripts/check_urls.vim local function validate_url(text, fname) local ignored = false if ignore_errors[vim.fs.basename(fname)] then ignored = true elseif text:find('http%:') and not exclude_invalid_urls[text] then invalid_urls[text] = vim.fs.basename(fname) end return ignored end --- Traverses the tree at `root` and checks that |tag| links point to valid helptags. local function visit_validate(root, level, lang_tree, opt, stats) level = level or 0 local node_name = (root.named and root:named()) and root:type() or nil -- Parent kind (string). local parent = root:parent() and root:parent():type() or nil local toplevel = level < 1 local function node_text(node) return vim.treesitter.get_node_text(node or root, opt.buf) end local text = trim(node_text()) if root:child_count() > 0 then for node, _ in root:iter_children() do if node:named() then visit_validate(node, level + 1, lang_tree, opt, stats) end end end if node_name == 'ERROR' then if ignore_parse_error(opt.fname, text) then return end -- Store the raw text to give context to the error report. local sample_text = not toplevel and getbuflinestr(root, opt.buf, 0) or '[top level!]' -- Flatten the sample text to a single, truncated line. sample_text = vim.trim(sample_text):gsub('[\t\n]', ' '):sub(1, 80) table.insert(stats.parse_errors, sample_text) elseif (node_name == 'word' or node_name == 'uppercase_name') and (not vim.tbl_contains({'codespan', 'taglink', 'tag'}, parent)) then local text_nopunct = vim.fn.trim(text, '.,', 0) -- Ignore some punctuation. if spell_dict[text_nopunct] then invalid_spelling[text_nopunct] = invalid_spelling[text_nopunct] or {} invalid_spelling[text_nopunct][vim.fs.basename(opt.fname)] = node_text(root:parent()) end elseif node_name == 'url' then local fixed_url, _ = fix_url(trim(text)) validate_url(fixed_url, opt.fname) elseif node_name == 'taglink' or node_name == 'optionlink' then local _, _, _ = validate_link(root, opt.buf, opt.fname) end end -- Fix tab alignment issues caused by concealed characters like |, `, * in tags -- and code blocks. local function fix_tab_after_conceal(text, next_node_text) -- Vim tabs take into account the two concealed characters even though they -- are invisible, so we need to add back in the two spaces if this is -- followed by a tab to make the tab alignment to match Vim's behavior. if string.sub(next_node_text,1,1) == '\t' then text = text .. ' ' end return text end -- Generates HTML from node `root` recursively. local function visit_node(root, level, lang_tree, headings, opt, stats) level = level or 0 local node_name = (root.named and root:named()) and root:type() or nil -- Previous sibling kind (string). local prev = root:prev_sibling() and (root:prev_sibling().named and root:prev_sibling():named()) and root:prev_sibling():type() or nil -- Next sibling kind (string). local next_ = root:next_sibling() and (root:next_sibling().named and root:next_sibling():named()) and root:next_sibling():type() or nil -- Parent kind (string). local parent = root:parent() and root:parent():type() or nil local text = '' local trimmed -- Gets leading whitespace of `node`. local function ws(node) node = node or root local ws_ = getws(node, opt.buf) -- XXX: first node of a (line) includes whitespace, even after -- https://github.com/neovim/tree-sitter-vimdoc/pull/31 ? if ws_ == '' then ws_ = vim.treesitter.get_node_text(node, opt.buf):match('^%s+') or '' end return ws_ end local function node_text(node, ws_) node = node or root ws_ = (ws_ == nil or ws_ == true) and getws(node, opt.buf) or '' return string.format('%s%s', ws_, vim.treesitter.get_node_text(node, opt.buf)) end if root:named_child_count() == 0 or node_name == 'ERROR' then text = node_text() trimmed = html_esc(trim(text)) text = html_esc(text) else -- Process children and join them with whitespace. for node, _ in root:iter_children() do if node:named() then local r = visit_node(node, level + 1, lang_tree, headings, opt, stats) text = string.format('%s%s', text, r) end end trimmed = trim(text) end if node_name == 'help_file' then -- root node return text elseif node_name == 'url' then local fixed_url, removed_chars = fix_url(trimmed) return ('%s%s%s'):format(ws(), fixed_url, fixed_url, removed_chars) elseif node_name == 'word' or node_name == 'uppercase_name' then return text elseif node_name == 'h1' or node_name == 'h2' or node_name == 'h3' then if is_noise(text, stats.noise_lines) then return '' -- Discard common "noise" lines. end -- Remove "===" and tags from ToC text. local hname = (node_text():gsub('%-%-%-%-+', ''):gsub('%=%=%=%=+', ''):gsub('%*.*%*', '')) -- Use the first *tag* node as the heading anchor, if any. local tagnode = first(root, 'tag') -- Use the *tag* as the heading anchor id, if possible. local tagname = tagnode and url_encode(node_text(tagnode:child(1), false)) or to_heading_tag(hname) if node_name == 'h1' or #headings == 0 then table.insert(headings, { name = hname, subheadings = {}, tag = tagname }) else table.insert(headings[#headings].subheadings, { name = hname, subheadings = {}, tag = tagname }) end local el = node_name == 'h1' and 'h2' or 'h3' return ('<%s id="%s" class="help-heading">%s%s>\n'):format(el, tagname, text, el) elseif node_name == 'column_heading' or node_name == 'column_name' then if root:has_error() then return text end return ('
%s'):format(ws(), trimmed)
    if opt.old and node_name == 'codespan' then
      s = fix_tab_after_conceal(s, node_text(root:next_sibling()))
    end
    return s
  elseif node_name == 'argument' then
    return ('%s{%s}'):format(ws(), text)
  elseif node_name == 'codeblock' then
    return text
  elseif node_name == 'language' then
    language = node_text(root)
    return ''
  elseif node_name == 'code' then  -- Highlighted codeblock (child).
    if is_blank(text) then
      return ''
    end
    local code
    if language then
      code = ('%s%s'):format(trim(trim_indent(text), 2)) end return code elseif node_name == 'tag' then -- anchor if root:has_error() then return text end local in_heading = vim.list_contains({'h1', 'h2', 'h3'}, parent) local cssclass = (not in_heading and get_indent(node_text()) > 8) and 'help-tag-right' or 'help-tag' local tagname = node_text(root:child(1), false) if vim.tbl_count(stats.first_tags) < 2 then -- Force the first 2 tags in the doc to be anchored at the main heading. table.insert(stats.first_tags, tagname) return '' end local el = in_heading and 'span' or 'code' local encoded_tagname = url_encode(tagname) local s = ('%s<%s id="%s" class="%s">%s%s>'):format(ws(), el, encoded_tagname, cssclass, encoded_tagname, trimmed, el) if opt.old then s = fix_tab_after_conceal(s, node_text(root:next_sibling())) end if in_heading and prev ~= 'tag' then -- Don't set "id", let the heading use the tag as its "id" (used by search engines). s = ('%s<%s class="%s">%s%s>'):format(ws(), el, cssclass, encoded_tagname, trimmed, el) -- Start the container for tags in a heading. -- This makes "justify-content:space-between" right-align the tags. --