mirror of
https://github.com/neovim/neovim.git
synced 2025-12-09 08:02:38 +00:00
ci: check URL reachability #35593
Problem: scripts/check_urls.vim manually matches urls in the help pages and then synchronously checks them via curl/wget/powershell. This is extremely slow (~5 minutes for Nvims runtime on my machine) and prone to errors in how the urls are matched. Solution: - Use Tree-sitter to find the urls in the help pages and `vim.net.request` to check the responses. - Add a `lintdocurls` build task and check it in CI (every Friday). - Reopens a dedicated issue if it finds unreachable URLs. - Drop the old check_urls.vim script.
This commit is contained in:
committed by
GitHub
parent
098da1fc2c
commit
a468bc573d
39
.github/workflows/lintdocurls.yml
vendored
Normal file
39
.github/workflows/lintdocurls.yml
vendored
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
name: lintdoc-urls
|
||||||
|
on:
|
||||||
|
schedule:
|
||||||
|
- cron: '22 22 * * 5'
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
check-unreachable-urls:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v5
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Set up git config
|
||||||
|
run: |
|
||||||
|
git config --global user.name 'marvim'
|
||||||
|
git config --global user.email 'marvim@users.noreply.github.com'
|
||||||
|
|
||||||
|
- uses: ./.github/actions/setup
|
||||||
|
|
||||||
|
- name: Check for unreachable URLs
|
||||||
|
id: unreachable-urls
|
||||||
|
env:
|
||||||
|
run_url: https://github.com/neovim/neovim/actions/runs/${{ github.run_id }}
|
||||||
|
run: |
|
||||||
|
OUT_FILE=$(mktemp)
|
||||||
|
make lintdocurls 2>&1 | sed -n '/invalid URLs/,/^}/p' > $OUT_FILE
|
||||||
|
if [ -n $OUT_FILE -a -s $OUT_FILE ]; then
|
||||||
|
# wrap output in a code block
|
||||||
|
sed -i '1i```' -e '$a```' $OUT_FILE
|
||||||
|
echo "Automatically generated on $(date) from $run_url" >> $OUT_FILE
|
||||||
|
gh issue reopen 36597
|
||||||
|
gh issue edit 36597 --body-file $OUT_FILE
|
||||||
|
fi
|
||||||
2
Makefile
2
Makefile
@@ -136,7 +136,7 @@ functionaltest-lua: | nvim
|
|||||||
$(CMAKE) --build build --target functionaltest
|
$(CMAKE) --build build --target functionaltest
|
||||||
|
|
||||||
FORMAT=formatc formatlua format
|
FORMAT=formatc formatlua format
|
||||||
LINT=lintlua lintsh lintc clang-analyzer lintcommit lintdoc lint luals
|
LINT=lintlua lintsh lintc clang-analyzer lintcommit lintdoc lintdocurls lint luals
|
||||||
TEST=functionaltest unittest
|
TEST=functionaltest unittest
|
||||||
generated-sources benchmark $(FORMAT) $(LINT) $(TEST) doc: | build/.ran-cmake
|
generated-sources benchmark $(FORMAT) $(LINT) $(TEST) doc: | build/.ran-cmake
|
||||||
$(CMAKE) --build build --target $@
|
$(CMAKE) --build build --target $@
|
||||||
|
|||||||
@@ -1,89 +0,0 @@
|
|||||||
" Test for URLs in help documents.
|
|
||||||
"
|
|
||||||
" Opens a new window with all found URLS followed by return code from curl
|
|
||||||
" (anything other than 0 means unreachable)
|
|
||||||
"
|
|
||||||
" Written by Christian Brabandt.
|
|
||||||
|
|
||||||
func Test_check_URLs()
|
|
||||||
"20.10.23, added by Restorer
|
|
||||||
if has("win32")
|
|
||||||
let s:outdev = 'nul'
|
|
||||||
else
|
|
||||||
let s:outdev = '/dev/null'
|
|
||||||
endif
|
|
||||||
" Restorer: For Windows users. If "curl" or "wget" is installed on the system
|
|
||||||
" but not in %PATH%, add the full path to them to %PATH% environment variable.
|
|
||||||
if executable('curl')
|
|
||||||
" Note: does not follow redirects!
|
|
||||||
let s:command1 = 'curl --silent --max-time 5 --fail --output ' ..s:outdev.. ' --head '
|
|
||||||
let s:command2 = ""
|
|
||||||
elseif executable('wget')
|
|
||||||
" Note: only allow a couple of redirects
|
|
||||||
let s:command1 = 'wget --quiet -S --spider --max-redirect=2 --timeout=5 --tries=2 -O ' ..s:outdev.. ' '
|
|
||||||
let s:command2 = ""
|
|
||||||
elseif has("win32") "20.10.23, added by Restorer
|
|
||||||
if executable('powershell')
|
|
||||||
if 2 == system('powershell -nologo -noprofile "$psversiontable.psversion.major"')
|
|
||||||
echoerr 'To work in OS Windows requires the program "PowerShell" version 3.0 or higher'
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
let s:command1 =
|
|
||||||
\ "powershell -nologo -noprofile \"{[Net.ServicePointManager]::SecurityProtocol = 'Tls12, Tls11, Tls, Ssl3'};try{(Invoke-WebRequest -MaximumRedirection 2 -TimeoutSec 5 -Uri "
|
|
||||||
let s:command2 = ').StatusCode}catch{exit [int]$Error[0].Exception.Status}"'
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
echoerr 'Only works when "curl" or "wget", or "powershell" is available'
|
|
||||||
return
|
|
||||||
endif
|
|
||||||
|
|
||||||
" Do the testing.
|
|
||||||
set report =999
|
|
||||||
set nomore shm +=s
|
|
||||||
|
|
||||||
let pat='\(https\?\|ftp\)://[^\t* ]\+'
|
|
||||||
exe 'helpgrep' pat
|
|
||||||
helpclose
|
|
||||||
|
|
||||||
let urls = map(getqflist(), 'v:val.text')
|
|
||||||
" do not use submatch(1)!
|
|
||||||
let urls = map(urls, {key, val -> matchstr(val, pat)})
|
|
||||||
" remove examples like user@host (invalid urls)
|
|
||||||
let urls = filter(urls, 'v:val !~ "@"')
|
|
||||||
" Remove example URLs which are invalid
|
|
||||||
let urls = filter(urls, {key, val -> val !~ '\<\(\(my\|some\)\?host\|machine\|hostname\|file\)\>'})
|
|
||||||
new
|
|
||||||
put =urls
|
|
||||||
" remove some more invalid items
|
|
||||||
" empty lines
|
|
||||||
"20.10.23, Restorer: '_' is a little faster, see `:h global`
|
|
||||||
v/./d _
|
|
||||||
" remove # anchors
|
|
||||||
%s/#.*$//e
|
|
||||||
" remove trailing stuff (parenthesis, dot, comma, quotes), but only for HTTP
|
|
||||||
" links
|
|
||||||
g/^h/s#[.),'"`/>][:.,]\?$##
|
|
||||||
g#^[hf]t\?tp:/\(/\?\.*\)$#d _
|
|
||||||
silent! g/ftp://,$/d _
|
|
||||||
silent! g/=$/d _
|
|
||||||
let a = getline(1,'$')
|
|
||||||
let a = uniq(sort(a))
|
|
||||||
%d _
|
|
||||||
call setline(1, a)
|
|
||||||
|
|
||||||
%s/.*/\=TestURL(submatch(0))/
|
|
||||||
|
|
||||||
" highlight the failures
|
|
||||||
/.* \([0-9]*[1-9]\|[0-9]\{2,}\)$
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
func TestURL(url)
|
|
||||||
" Relies on the return code to determine whether a page is valid
|
|
||||||
echom printf("Testing URL: %d/%d %s", line('.'), line('$'), a:url)
|
|
||||||
call system(s:command1 .. shellescape(a:url) .. s:command2)
|
|
||||||
return printf("%s %d", a:url, v:shell_error)
|
|
||||||
endfunc
|
|
||||||
|
|
||||||
call Test_check_URLs()
|
|
||||||
|
|
||||||
" vim: sw=2 sts=2 et
|
|
||||||
@@ -16,7 +16,7 @@ print('Running lintdoc ...')
|
|||||||
vim.cmd [[ packadd netrw ]]
|
vim.cmd [[ packadd netrw ]]
|
||||||
vim.cmd [[ helptags ALL ]]
|
vim.cmd [[ helptags ALL ]]
|
||||||
|
|
||||||
require('src.gen.gen_help_html').run_validate()
|
require('src.gen.gen_help_html').run_validate(nil, _G.arg[1] ~= nil)
|
||||||
require('src.gen.gen_help_html').test_gen()
|
require('src.gen.gen_help_html').test_gen()
|
||||||
|
|
||||||
print('lintdoc PASSED.')
|
print('lintdoc PASSED.')
|
||||||
|
|||||||
@@ -208,7 +208,7 @@ preprocess_patch() {
|
|||||||
2>/dev/null $nvim --cmd 'set dir=/tmp' +'g@^diff --git [ab]/runtime/\<\%('"${na_rt}"'\)\>@exe "norm! d/\\v(^diff)|%$\r"' +w +q "$file"
|
2>/dev/null $nvim --cmd 'set dir=/tmp' +'g@^diff --git [ab]/runtime/\<\%('"${na_rt}"'\)\>@exe "norm! d/\\v(^diff)|%$\r"' +w +q "$file"
|
||||||
|
|
||||||
# Remove unwanted Vim doc files.
|
# Remove unwanted Vim doc files.
|
||||||
local na_doc='channel\.txt\|if_cscop\.txt\|netbeans\.txt\|os_\w\+\.txt\|print\.txt\|term\.txt\|testing\.txt\|todo\.txt\|vim9\.txt\|tags'
|
local na_doc='channel\.txt\|if_cscop\.txt\|netbeans\.txt\|os_\w\+\.txt\|print\.txt\|term\.txt\|testing\.txt\|todo\.txt\|vim9\.txt\|tags\|test_urls\.vim'
|
||||||
2>/dev/null $nvim --cmd 'set dir=/tmp' +'g@^diff --git [ab]/runtime/doc/\<\%('"${na_doc}"'\)\>@exe "norm! d/\\v(^diff)|%$\r"' +w +q "$file"
|
2>/dev/null $nvim --cmd 'set dir=/tmp' +'g@^diff --git [ab]/runtime/doc/\<\%('"${na_doc}"'\)\>@exe "norm! d/\\v(^diff)|%$\r"' +w +q "$file"
|
||||||
|
|
||||||
# Remove "Last change ..." changes in doc files.
|
# Remove "Last change ..." changes in doc files.
|
||||||
@@ -327,10 +327,6 @@ preprocess_patch() {
|
|||||||
LC_ALL=C sed -Ee 's/( [ab]\/runtime\/doc)\/sponsor\.txt/\1\/intro.txt/g' \
|
LC_ALL=C sed -Ee 's/( [ab]\/runtime\/doc)\/sponsor\.txt/\1\/intro.txt/g' \
|
||||||
"$file" > "$file".tmp && mv "$file".tmp "$file"
|
"$file" > "$file".tmp && mv "$file".tmp "$file"
|
||||||
|
|
||||||
# Rename test_urls.vim to check_urls.vim
|
|
||||||
LC_ALL=C sed -Ee 's/( [ab])\/runtime\/doc\/test(_urls\.vim)/\1\/scripts\/check\2/g' \
|
|
||||||
"$file" > "$file".tmp && mv "$file".tmp "$file"
|
|
||||||
|
|
||||||
# Rename path to check_colors.vim
|
# Rename path to check_colors.vim
|
||||||
LC_ALL=C sed -Ee 's/( [ab]\/runtime)\/colors\/(tools\/check_colors\.vim)/\1\/\2/g' \
|
LC_ALL=C sed -Ee 's/( [ab]\/runtime)\/colors\/(tools\/check_colors\.vim)/\1\/\2/g' \
|
||||||
"$file" > "$file".tmp && mv "$file".tmp "$file"
|
"$file" > "$file".tmp && mv "$file".tmp "$file"
|
||||||
|
|||||||
@@ -15,6 +15,8 @@
|
|||||||
-- USAGE (VALIDATE):
|
-- USAGE (VALIDATE):
|
||||||
-- 1. nvim -V1 -es +"lua require('src.gen.gen_help_html').validate('./runtime/doc')" +q
|
-- 1. nvim -V1 -es +"lua require('src.gen.gen_help_html').validate('./runtime/doc')" +q
|
||||||
-- - validate() is 10x faster than gen(), so it is used in CI.
|
-- - validate() is 10x faster than gen(), so it is used in CI.
|
||||||
|
-- 2. Check for unreachable URLs:
|
||||||
|
-- nvim -V1 -es +"lua require('src.gen.gen_help_html').validate('./runtime/doc', true)" +q
|
||||||
--
|
--
|
||||||
-- SELF-TEST MODE:
|
-- SELF-TEST MODE:
|
||||||
-- 1. nvim -V1 -es +"lua require('src.gen.gen_help_html')._test()" +q
|
-- 1. nvim -V1 -es +"lua require('src.gen.gen_help_html')._test()" +q
|
||||||
@@ -28,6 +30,7 @@
|
|||||||
-- * visit_validate() is the core function used by validate().
|
-- * visit_validate() is the core function used by validate().
|
||||||
-- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout.
|
-- * Files in `new_layout` will be generated with a "flow" layout instead of preformatted/fixed-width layout.
|
||||||
|
|
||||||
|
local pending_urls = 0
|
||||||
local tagmap = nil ---@type table<string, string>
|
local tagmap = nil ---@type table<string, string>
|
||||||
local helpfiles = nil ---@type string[]
|
local helpfiles = nil ---@type string[]
|
||||||
local invalid_links = {} ---@type table<string, any>
|
local invalid_links = {} ---@type table<string, any>
|
||||||
@@ -383,15 +386,26 @@ local function validate_link(node, bufnr, fname)
|
|||||||
return helppage, tagname, ignored
|
return helppage, tagname, ignored
|
||||||
end
|
end
|
||||||
|
|
||||||
--- TODO: port the logic from scripts/check_urls.vim
|
local function validate_url(text, fname, check_unreachable)
|
||||||
local function validate_url(text, fname)
|
fname = vim.fs.basename(fname)
|
||||||
local ignored = false
|
local ignored = ignore_errors[fname] or exclude_invalid_urls[text]
|
||||||
if ignore_errors[vim.fs.basename(fname)] then
|
if ignored then
|
||||||
ignored = true
|
return true
|
||||||
elseif text:find('http%:') and not exclude_invalid_urls[text] then
|
|
||||||
invalid_urls[text] = vim.fs.basename(fname)
|
|
||||||
end
|
end
|
||||||
return ignored
|
if check_unreachable then
|
||||||
|
vim.net.request(text, { retry = 2 }, function(err, _)
|
||||||
|
if err then
|
||||||
|
invalid_urls[text] = fname
|
||||||
|
end
|
||||||
|
pending_urls = pending_urls - 1
|
||||||
|
end)
|
||||||
|
pending_urls = pending_urls + 1
|
||||||
|
else
|
||||||
|
if text:find('http%:') then
|
||||||
|
invalid_urls[text] = fname
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
--- Traverses the tree at `root` and checks that |tag| links point to valid helptags.
|
--- Traverses the tree at `root` and checks that |tag| links point to valid helptags.
|
||||||
@@ -449,7 +463,7 @@ local function visit_validate(root, level, lang_tree, opt, stats)
|
|||||||
end
|
end
|
||||||
elseif node_name == 'url' then
|
elseif node_name == 'url' then
|
||||||
local fixed_url, _ = fix_url(trim(text))
|
local fixed_url, _ = fix_url(trim(text))
|
||||||
validate_url(fixed_url, opt.fname)
|
validate_url(fixed_url, opt.fname, opt.request_urls)
|
||||||
elseif node_name == 'taglink' or node_name == 'optionlink' then
|
elseif node_name == 'taglink' or node_name == 'optionlink' then
|
||||||
local _, _, _ = validate_link(root, opt.buf, opt.fname)
|
local _, _, _ = validate_link(root, opt.buf, opt.fname)
|
||||||
end
|
end
|
||||||
@@ -811,14 +825,19 @@ end
|
|||||||
---
|
---
|
||||||
--- @param fname string help file to validate
|
--- @param fname string help file to validate
|
||||||
--- @param parser_path string? path to non-default vimdoc.so
|
--- @param parser_path string? path to non-default vimdoc.so
|
||||||
|
--- @param request_urls boolean? whether to make requests to the URLs
|
||||||
--- @return { invalid_links: number, parse_errors: string[] }
|
--- @return { invalid_links: number, parse_errors: string[] }
|
||||||
local function validate_one(fname, parser_path)
|
local function validate_one(fname, parser_path, request_urls)
|
||||||
local stats = {
|
local stats = {
|
||||||
parse_errors = {},
|
parse_errors = {},
|
||||||
}
|
}
|
||||||
local lang_tree, buf = parse_buf(fname, nil, parser_path)
|
local lang_tree, buf = parse_buf(fname, nil, parser_path)
|
||||||
for _, tree in ipairs(lang_tree:trees()) do
|
for _, tree in ipairs(lang_tree:trees()) do
|
||||||
visit_validate(tree:root(), 0, tree, { buf = buf, fname = fname }, stats)
|
visit_validate(tree:root(), 0, tree, {
|
||||||
|
buf = buf,
|
||||||
|
fname = fname,
|
||||||
|
request_urls = request_urls,
|
||||||
|
}, stats)
|
||||||
end
|
end
|
||||||
lang_tree:destroy()
|
lang_tree:destroy()
|
||||||
vim.cmd.close()
|
vim.cmd.close()
|
||||||
@@ -1484,7 +1503,7 @@ end
|
|||||||
--- This is 10x faster than gen(), for use in CI.
|
--- This is 10x faster than gen(), for use in CI.
|
||||||
---
|
---
|
||||||
--- @return nvim.gen_help_html.validate_result result
|
--- @return nvim.gen_help_html.validate_result result
|
||||||
function M.validate(help_dir, include, parser_path)
|
function M.validate(help_dir, include, parser_path, request_urls)
|
||||||
vim.validate('help_dir', help_dir, function(d)
|
vim.validate('help_dir', help_dir, function(d)
|
||||||
return vim.fn.isdirectory(vim.fs.normalize(d)) == 1
|
return vim.fn.isdirectory(vim.fs.normalize(d)) == 1
|
||||||
end, 'valid directory')
|
end, 'valid directory')
|
||||||
@@ -1501,7 +1520,7 @@ function M.validate(help_dir, include, parser_path)
|
|||||||
|
|
||||||
for _, f in ipairs(helpfiles) do
|
for _, f in ipairs(helpfiles) do
|
||||||
local helpfile = vim.fs.basename(f)
|
local helpfile = vim.fs.basename(f)
|
||||||
local rv = validate_one(f, parser_path)
|
local rv = validate_one(f, parser_path, request_urls)
|
||||||
print(('validated (%-4s errors): %s'):format(#rv.parse_errors, helpfile))
|
print(('validated (%-4s errors): %s'):format(#rv.parse_errors, helpfile))
|
||||||
if #rv.parse_errors > 0 then
|
if #rv.parse_errors > 0 then
|
||||||
files_to_errors[helpfile] = rv.parse_errors
|
files_to_errors[helpfile] = rv.parse_errors
|
||||||
@@ -1512,6 +1531,13 @@ function M.validate(help_dir, include, parser_path)
|
|||||||
err_count = err_count + #rv.parse_errors
|
err_count = err_count + #rv.parse_errors
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Requests are async, wait for them to finish.
|
||||||
|
-- TODO(yochem): `:cancel()` tasks after #36146
|
||||||
|
vim.wait(20000, function()
|
||||||
|
return pending_urls <= 0
|
||||||
|
end)
|
||||||
|
ok(pending_urls <= 0, 'pending url checks', pending_urls)
|
||||||
|
|
||||||
---@type nvim.gen_help_html.validate_result
|
---@type nvim.gen_help_html.validate_result
|
||||||
return {
|
return {
|
||||||
helpfiles = #helpfiles,
|
helpfiles = #helpfiles,
|
||||||
@@ -1531,11 +1557,12 @@ end
|
|||||||
--- 3. File a parser bug, and adjust the tolerance of this test in the meantime.
|
--- 3. File a parser bug, and adjust the tolerance of this test in the meantime.
|
||||||
---
|
---
|
||||||
--- @param help_dir? string e.g. '$VIMRUNTIME/doc' or './runtime/doc'
|
--- @param help_dir? string e.g. '$VIMRUNTIME/doc' or './runtime/doc'
|
||||||
function M.run_validate(help_dir)
|
--- @param request_urls? boolean make network requests to check if the URLs are reachable.
|
||||||
|
function M.run_validate(help_dir, request_urls)
|
||||||
help_dir = vim.fs.normalize(help_dir or '$VIMRUNTIME/doc')
|
help_dir = vim.fs.normalize(help_dir or '$VIMRUNTIME/doc')
|
||||||
print('doc path = ' .. vim.uv.fs_realpath(help_dir))
|
print('doc path = ' .. vim.uv.fs_realpath(help_dir))
|
||||||
|
|
||||||
local rv = M.validate(help_dir)
|
local rv = M.validate(help_dir, nil, nil, request_urls)
|
||||||
|
|
||||||
-- Check that we actually found helpfiles.
|
-- Check that we actually found helpfiles.
|
||||||
ok(rv.helpfiles > 100, '>100 :help files', rv.helpfiles)
|
ok(rv.helpfiles > 100, '>100 :help files', rv.helpfiles)
|
||||||
|
|||||||
@@ -999,3 +999,9 @@ add_target(lintdoc
|
|||||||
DEPENDS ${DOCFILES}
|
DEPENDS ${DOCFILES}
|
||||||
CUSTOM_COMMAND_ARGS USES_TERMINAL)
|
CUSTOM_COMMAND_ARGS USES_TERMINAL)
|
||||||
add_dependencies(lintdoc nvim)
|
add_dependencies(lintdoc nvim)
|
||||||
|
|
||||||
|
add_target(lintdocurls
|
||||||
|
COMMAND ${NVIM_LUA} scripts/lintdoc.lua true
|
||||||
|
DEPENDS ${DOCFILES}
|
||||||
|
CUSTOM_COMMAND_ARGS USES_TERMINAL)
|
||||||
|
add_dependencies(lintdocurls nvim)
|
||||||
|
|||||||
Reference in New Issue
Block a user