From a468bc573d094612e8d9d5b8adb97dd7ada3ccc1 Mon Sep 17 00:00:00 2001 From: Yochem van Rosmalen Date: Tue, 18 Nov 2025 21:35:22 +0100 Subject: [PATCH] ci: check URL reachability #35593 Problem: scripts/check_urls.vim manually matches urls in the help pages and then synchronously checks them via curl/wget/powershell. This is extremely slow (~5 minutes for Nvims runtime on my machine) and prone to errors in how the urls are matched. Solution: - Use Tree-sitter to find the urls in the help pages and `vim.net.request` to check the responses. - Add a `lintdocurls` build task and check it in CI (every Friday). - Reopens a dedicated issue if it finds unreachable URLs. - Drop the old check_urls.vim script. --- .github/workflows/lintdocurls.yml | 39 ++++++++++++++ Makefile | 2 +- scripts/check_urls.vim | 89 ------------------------------- scripts/lintdoc.lua | 2 +- scripts/vim-patch.sh | 6 +-- src/gen/gen_help_html.lua | 57 ++++++++++++++------ src/nvim/CMakeLists.txt | 6 +++ 7 files changed, 90 insertions(+), 111 deletions(-) create mode 100644 .github/workflows/lintdocurls.yml delete mode 100644 scripts/check_urls.vim diff --git a/.github/workflows/lintdocurls.yml b/.github/workflows/lintdocurls.yml new file mode 100644 index 0000000000..a7d3d542e7 --- /dev/null +++ b/.github/workflows/lintdocurls.yml @@ -0,0 +1,39 @@ +name: lintdoc-urls +on: + schedule: + - cron: '22 22 * * 5' + workflow_dispatch: + +jobs: + check-unreachable-urls: + runs-on: ubuntu-latest + permissions: + issues: write + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Set up git config + run: | + git config --global user.name 'marvim' + git config --global user.email 'marvim@users.noreply.github.com' + + - uses: ./.github/actions/setup + + - name: Check for unreachable URLs + id: unreachable-urls + env: + run_url: https://github.com/neovim/neovim/actions/runs/${{ github.run_id }} + run: | + OUT_FILE=$(mktemp) + make lintdocurls 2>&1 | sed -n '/invalid URLs/,/^}/p' > $OUT_FILE + if [ -n $OUT_FILE -a -s $OUT_FILE ]; then + # wrap output in a code block + sed -i '1i```' -e '$a```' $OUT_FILE + echo "Automatically generated on $(date) from $run_url" >> $OUT_FILE + gh issue reopen 36597 + gh issue edit 36597 --body-file $OUT_FILE + fi diff --git a/Makefile b/Makefile index 851b0a3203..e0dbe962e7 100644 --- a/Makefile +++ b/Makefile @@ -136,7 +136,7 @@ functionaltest-lua: | nvim $(CMAKE) --build build --target functionaltest FORMAT=formatc formatlua format -LINT=lintlua lintsh lintc clang-analyzer lintcommit lintdoc lint luals +LINT=lintlua lintsh lintc clang-analyzer lintcommit lintdoc lintdocurls lint luals TEST=functionaltest unittest generated-sources benchmark $(FORMAT) $(LINT) $(TEST) doc: | build/.ran-cmake $(CMAKE) --build build --target $@ diff --git a/scripts/check_urls.vim b/scripts/check_urls.vim deleted file mode 100644 index b75dc29c48..0000000000 --- a/scripts/check_urls.vim +++ /dev/null @@ -1,89 +0,0 @@ -" Test for URLs in help documents. -" -" Opens a new window with all found URLS followed by return code from curl -" (anything other than 0 means unreachable) -" -" Written by Christian Brabandt. - -func Test_check_URLs() -"20.10.23, added by Restorer - if has("win32") - let s:outdev = 'nul' - else - let s:outdev = '/dev/null' - endif -" Restorer: For Windows users. If "curl" or "wget" is installed on the system -" but not in %PATH%, add the full path to them to %PATH% environment variable. - if executable('curl') - " Note: does not follow redirects! - let s:command1 = 'curl --silent --max-time 5 --fail --output ' ..s:outdev.. ' --head ' - let s:command2 = "" - elseif executable('wget') - " Note: only allow a couple of redirects - let s:command1 = 'wget --quiet -S --spider --max-redirect=2 --timeout=5 --tries=2 -O ' ..s:outdev.. ' ' - let s:command2 = "" - elseif has("win32") "20.10.23, added by Restorer - if executable('powershell') - if 2 == system('powershell -nologo -noprofile "$psversiontable.psversion.major"') - echoerr 'To work in OS Windows requires the program "PowerShell" version 3.0 or higher' - return - endif - let s:command1 = - \ "powershell -nologo -noprofile \"{[Net.ServicePointManager]::SecurityProtocol = 'Tls12, Tls11, Tls, Ssl3'};try{(Invoke-WebRequest -MaximumRedirection 2 -TimeoutSec 5 -Uri " - let s:command2 = ').StatusCode}catch{exit [int]$Error[0].Exception.Status}"' - endif - else - echoerr 'Only works when "curl" or "wget", or "powershell" is available' - return - endif - - " Do the testing. - set report =999 - set nomore shm +=s - - let pat='\(https\?\|ftp\)://[^\t* ]\+' - exe 'helpgrep' pat - helpclose - - let urls = map(getqflist(), 'v:val.text') - " do not use submatch(1)! - let urls = map(urls, {key, val -> matchstr(val, pat)}) - " remove examples like user@host (invalid urls) - let urls = filter(urls, 'v:val !~ "@"') - " Remove example URLs which are invalid - let urls = filter(urls, {key, val -> val !~ '\<\(\(my\|some\)\?host\|machine\|hostname\|file\)\>'}) - new - put =urls - " remove some more invalid items - " empty lines - "20.10.23, Restorer: '_' is a little faster, see `:h global` - v/./d _ - " remove # anchors - %s/#.*$//e - " remove trailing stuff (parenthesis, dot, comma, quotes), but only for HTTP - " links - g/^h/s#[.),'"`/>][:.,]\?$## - g#^[hf]t\?tp:/\(/\?\.*\)$#d _ - silent! g/ftp://,$/d _ - silent! g/=$/d _ - let a = getline(1,'$') - let a = uniq(sort(a)) - %d _ - call setline(1, a) - - %s/.*/\=TestURL(submatch(0))/ - - " highlight the failures - /.* \([0-9]*[1-9]\|[0-9]\{2,}\)$ -endfunc - -func TestURL(url) - " Relies on the return code to determine whether a page is valid - echom printf("Testing URL: %d/%d %s", line('.'), line('$'), a:url) - call system(s:command1 .. shellescape(a:url) .. s:command2) - return printf("%s %d", a:url, v:shell_error) -endfunc - -call Test_check_URLs() - -" vim: sw=2 sts=2 et diff --git a/scripts/lintdoc.lua b/scripts/lintdoc.lua index d246c51176..a81dbd0d74 100755 --- a/scripts/lintdoc.lua +++ b/scripts/lintdoc.lua @@ -16,7 +16,7 @@ print('Running lintdoc ...') vim.cmd [[ packadd netrw ]] vim.cmd [[ helptags ALL ]] -require('src.gen.gen_help_html').run_validate() +require('src.gen.gen_help_html').run_validate(nil, _G.arg[1] ~= nil) require('src.gen.gen_help_html').test_gen() print('lintdoc PASSED.') diff --git a/scripts/vim-patch.sh b/scripts/vim-patch.sh index eda530e10a..2cef123c93 100755 --- a/scripts/vim-patch.sh +++ b/scripts/vim-patch.sh @@ -208,7 +208,7 @@ preprocess_patch() { 2>/dev/null $nvim --cmd 'set dir=/tmp' +'g@^diff --git [ab]/runtime/\<\%('"${na_rt}"'\)\>@exe "norm! d/\\v(^diff)|%$\r"' +w +q "$file" # Remove unwanted Vim doc files. - local na_doc='channel\.txt\|if_cscop\.txt\|netbeans\.txt\|os_\w\+\.txt\|print\.txt\|term\.txt\|testing\.txt\|todo\.txt\|vim9\.txt\|tags' + local na_doc='channel\.txt\|if_cscop\.txt\|netbeans\.txt\|os_\w\+\.txt\|print\.txt\|term\.txt\|testing\.txt\|todo\.txt\|vim9\.txt\|tags\|test_urls\.vim' 2>/dev/null $nvim --cmd 'set dir=/tmp' +'g@^diff --git [ab]/runtime/doc/\<\%('"${na_doc}"'\)\>@exe "norm! d/\\v(^diff)|%$\r"' +w +q "$file" # Remove "Last change ..." changes in doc files. @@ -327,10 +327,6 @@ preprocess_patch() { LC_ALL=C sed -Ee 's/( [ab]\/runtime\/doc)\/sponsor\.txt/\1\/intro.txt/g' \ "$file" > "$file".tmp && mv "$file".tmp "$file" - # Rename test_urls.vim to check_urls.vim - LC_ALL=C sed -Ee 's/( [ab])\/runtime\/doc\/test(_urls\.vim)/\1\/scripts\/check\2/g' \ - "$file" > "$file".tmp && mv "$file".tmp "$file" - # Rename path to check_colors.vim LC_ALL=C sed -Ee 's/( [ab]\/runtime)\/colors\/(tools\/check_colors\.vim)/\1\/\2/g' \ "$file" > "$file".tmp && mv "$file".tmp "$file" diff --git a/src/gen/gen_help_html.lua b/src/gen/gen_help_html.lua index 08ab301eed..96f142ecc3 100644 --- a/src/gen/gen_help_html.lua +++ b/src/gen/gen_help_html.lua @@ -15,6 +15,8 @@ -- USAGE (VALIDATE): -- 1. nvim -V1 -es +"lua require('src.gen.gen_help_html').validate('./runtime/doc')" +q -- - validate() is 10x faster than gen(), so it is used in CI. +-- 2. Check for unreachable URLs: +-- nvim -V1 -es +"lua require('src.gen.gen_help_html').validate('./runtime/doc', true)" +q -- -- SELF-TEST MODE: -- 1. nvim -V1 -es +"lua require('src.gen.gen_help_html')._test()" +q @@ -28,6 +30,7 @@ -- * visit_validate() is the core function used by validate(). -- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout. +local pending_urls = 0 local tagmap = nil ---@type table local helpfiles = nil ---@type string[] local invalid_links = {} ---@type table @@ -383,15 +386,26 @@ local function validate_link(node, bufnr, fname) return helppage, tagname, ignored end ---- TODO: port the logic from scripts/check_urls.vim -local function validate_url(text, fname) - local ignored = false - if ignore_errors[vim.fs.basename(fname)] then - ignored = true - elseif text:find('http%:') and not exclude_invalid_urls[text] then - invalid_urls[text] = vim.fs.basename(fname) +local function validate_url(text, fname, check_unreachable) + fname = vim.fs.basename(fname) + local ignored = ignore_errors[fname] or exclude_invalid_urls[text] + if ignored then + return true end - return ignored + if check_unreachable then + vim.net.request(text, { retry = 2 }, function(err, _) + if err then + invalid_urls[text] = fname + end + pending_urls = pending_urls - 1 + end) + pending_urls = pending_urls + 1 + else + if text:find('http%:') then + invalid_urls[text] = fname + end + end + return false end --- Traverses the tree at `root` and checks that |tag| links point to valid helptags. @@ -449,7 +463,7 @@ local function visit_validate(root, level, lang_tree, opt, stats) end elseif node_name == 'url' then local fixed_url, _ = fix_url(trim(text)) - validate_url(fixed_url, opt.fname) + validate_url(fixed_url, opt.fname, opt.request_urls) elseif node_name == 'taglink' or node_name == 'optionlink' then local _, _, _ = validate_link(root, opt.buf, opt.fname) end @@ -811,14 +825,19 @@ end --- --- @param fname string help file to validate --- @param parser_path string? path to non-default vimdoc.so +--- @param request_urls boolean? whether to make requests to the URLs --- @return { invalid_links: number, parse_errors: string[] } -local function validate_one(fname, parser_path) +local function validate_one(fname, parser_path, request_urls) local stats = { parse_errors = {}, } local lang_tree, buf = parse_buf(fname, nil, parser_path) for _, tree in ipairs(lang_tree:trees()) do - visit_validate(tree:root(), 0, tree, { buf = buf, fname = fname }, stats) + visit_validate(tree:root(), 0, tree, { + buf = buf, + fname = fname, + request_urls = request_urls, + }, stats) end lang_tree:destroy() vim.cmd.close() @@ -1484,7 +1503,7 @@ end --- This is 10x faster than gen(), for use in CI. --- --- @return nvim.gen_help_html.validate_result result -function M.validate(help_dir, include, parser_path) +function M.validate(help_dir, include, parser_path, request_urls) vim.validate('help_dir', help_dir, function(d) return vim.fn.isdirectory(vim.fs.normalize(d)) == 1 end, 'valid directory') @@ -1501,7 +1520,7 @@ function M.validate(help_dir, include, parser_path) for _, f in ipairs(helpfiles) do local helpfile = vim.fs.basename(f) - local rv = validate_one(f, parser_path) + local rv = validate_one(f, parser_path, request_urls) print(('validated (%-4s errors): %s'):format(#rv.parse_errors, helpfile)) if #rv.parse_errors > 0 then files_to_errors[helpfile] = rv.parse_errors @@ -1512,6 +1531,13 @@ function M.validate(help_dir, include, parser_path) err_count = err_count + #rv.parse_errors end + -- Requests are async, wait for them to finish. + -- TODO(yochem): `:cancel()` tasks after #36146 + vim.wait(20000, function() + return pending_urls <= 0 + end) + ok(pending_urls <= 0, 'pending url checks', pending_urls) + ---@type nvim.gen_help_html.validate_result return { helpfiles = #helpfiles, @@ -1531,11 +1557,12 @@ end --- 3. File a parser bug, and adjust the tolerance of this test in the meantime. --- --- @param help_dir? string e.g. '$VIMRUNTIME/doc' or './runtime/doc' -function M.run_validate(help_dir) +--- @param request_urls? boolean make network requests to check if the URLs are reachable. +function M.run_validate(help_dir, request_urls) help_dir = vim.fs.normalize(help_dir or '$VIMRUNTIME/doc') print('doc path = ' .. vim.uv.fs_realpath(help_dir)) - local rv = M.validate(help_dir) + local rv = M.validate(help_dir, nil, nil, request_urls) -- Check that we actually found helpfiles. ok(rv.helpfiles > 100, '>100 :help files', rv.helpfiles) diff --git a/src/nvim/CMakeLists.txt b/src/nvim/CMakeLists.txt index 3fbed68bf2..c9a354ddbd 100644 --- a/src/nvim/CMakeLists.txt +++ b/src/nvim/CMakeLists.txt @@ -999,3 +999,9 @@ add_target(lintdoc DEPENDS ${DOCFILES} CUSTOM_COMMAND_ARGS USES_TERMINAL) add_dependencies(lintdoc nvim) + +add_target(lintdocurls + COMMAND ${NVIM_LUA} scripts/lintdoc.lua true + DEPENDS ${DOCFILES} + CUSTOM_COMMAND_ARGS USES_TERMINAL) +add_dependencies(lintdocurls nvim)