From ce8cf22af9be12b43e6eb0ce98c1e96ccefefba8 Mon Sep 17 00:00:00 2001 From: bircni Date: Sun, 28 Jun 2026 13:37:16 +0200 Subject: [PATCH] fix(actions): don't swallow HTML entities into linkified URLs (#38239) In the Actions log viewer, a double-quoted URL renders with a stray extra `;` after it. Reported in `gitea/runner#1046` Remove the buggy AI slop `linkifyURLs` and use new approach to process URLs in text --------- Signed-off-by: wxiaoguang Co-authored-by: wxiaoguang --- web_src/js/components/ActionRunView.ts | 6 +-- web_src/js/modules/codeeditor/utils.test.ts | 39 +------------- web_src/js/modules/codeeditor/utils.ts | 18 ++----- web_src/js/render/ansi.test.ts | 15 ++++-- web_src/js/render/ansi.ts | 43 +++++++++++++-- web_src/js/utils/url.test.ts | 59 ++++++++++++--------- web_src/js/utils/url.ts | 41 +++++--------- 7 files changed, 104 insertions(+), 117 deletions(-) diff --git a/web_src/js/components/ActionRunView.ts b/web_src/js/components/ActionRunView.ts index 540274dfb3d..52fcdf56cdd 100644 --- a/web_src/js/components/ActionRunView.ts +++ b/web_src/js/components/ActionRunView.ts @@ -1,5 +1,5 @@ import {createElementFromAttrs} from '../utils/dom.ts'; -import {renderAnsi} from '../render/ansi.ts'; +import {renderAnsiInto} from '../render/ansi.ts'; import {reactive} from 'vue'; import type {ActionsArtifact, ActionsJob, ActionsRun, ActionsStatus} from '../modules/gitea-actions.ts'; import type {IntervalId} from '../types.ts'; @@ -80,10 +80,10 @@ export function createLogLineMessage(line: LogLine, cmd: LogLineCommand | null) if (label) { logMsg.append(createElementFromAttrs('span', {class: 'log-msg-label'}, `${label}:`)); const msgSpan = document.createElement('span'); - msgSpan.innerHTML = ` ${renderAnsi(msgContent.trimStart())}`; + renderAnsiInto(msgSpan, ` ${msgContent.trimStart()}`); logMsg.append(msgSpan); } else { - logMsg.innerHTML = renderAnsi(msgContent); + renderAnsiInto(logMsg, msgContent); } return logMsg; } diff --git a/web_src/js/modules/codeeditor/utils.test.ts b/web_src/js/modules/codeeditor/utils.test.ts index 2bf3e74967a..dd903fa78c8 100644 --- a/web_src/js/modules/codeeditor/utils.test.ts +++ b/web_src/js/modules/codeeditor/utils.test.ts @@ -1,41 +1,4 @@ -import {findUrlAtPosition, trimUrlPunctuation, urlRawRegex} from './utils.ts'; - -function matchUrls(text: string): string[] { - return Array.from(text.matchAll(urlRawRegex), (m) => trimUrlPunctuation(m[0])); -} - -test('matchUrls', () => { - expect(matchUrls('visit https://example.com for info')).toEqual(['https://example.com']); - expect(matchUrls('see https://example.com.')).toEqual(['https://example.com']); - expect(matchUrls('see https://example.com, and')).toEqual(['https://example.com']); - expect(matchUrls('see https://example.com; and')).toEqual(['https://example.com']); - expect(matchUrls('(https://example.com)')).toEqual(['https://example.com']); - expect(matchUrls('"https://example.com"')).toEqual(['https://example.com']); - expect(matchUrls('https://example.com/path?q=1&b=2#hash')).toEqual(['https://example.com/path?q=1&b=2#hash']); - expect(matchUrls('https://example.com/path?q=1&b=2#hash.')).toEqual(['https://example.com/path?q=1&b=2#hash']); - expect(matchUrls('https://x.co')).toEqual(['https://x.co']); - expect(matchUrls('https://example.com/path_(wiki)')).toEqual(['https://example.com/path_(wiki)']); - expect(matchUrls('https://en.wikipedia.org/wiki/Rust_(programming_language)')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']); - expect(matchUrls('(https://en.wikipedia.org/wiki/Rust_(programming_language))')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']); - expect(matchUrls('http://example.com')).toEqual(['http://example.com']); - expect(matchUrls('no url here')).toEqual([]); - expect(matchUrls('https://a.com and https://b.com')).toEqual(['https://a.com', 'https://b.com']); - expect(matchUrls('[![](https://img.shields.io/npm/v/pkg.svg?style=flat)](https://www.npmjs.org/package/pkg)')).toEqual(['https://img.shields.io/npm/v/pkg.svg?style=flat', 'https://www.npmjs.org/package/pkg']); -}); - -test('trimUrlPunctuation', () => { - expect(trimUrlPunctuation('https://example.com.')).toEqual('https://example.com'); - expect(trimUrlPunctuation('https://example.com,')).toEqual('https://example.com'); - expect(trimUrlPunctuation('https://example.com;')).toEqual('https://example.com'); - expect(trimUrlPunctuation('https://example.com:')).toEqual('https://example.com'); - expect(trimUrlPunctuation("https://example.com'")).toEqual('https://example.com'); - expect(trimUrlPunctuation('https://example.com"')).toEqual('https://example.com'); - expect(trimUrlPunctuation('https://example.com.,;')).toEqual('https://example.com'); - expect(trimUrlPunctuation('https://example.com/path')).toEqual('https://example.com/path'); - expect(trimUrlPunctuation('https://example.com/path_(wiki)')).toEqual('https://example.com/path_(wiki)'); - expect(trimUrlPunctuation('https://example.com)')).toEqual('https://example.com'); - expect(trimUrlPunctuation('https://en.wikipedia.org/wiki/Rust_(lang))')).toEqual('https://en.wikipedia.org/wiki/Rust_(lang)'); -}); +import {findUrlAtPosition} from './utils.ts'; test('findUrlAtPosition', () => { const doc = 'visit https://example.com for info'; diff --git a/web_src/js/modules/codeeditor/utils.ts b/web_src/js/modules/codeeditor/utils.ts index a40cf191fe0..108c2a5b495 100644 --- a/web_src/js/modules/codeeditor/utils.ts +++ b/web_src/js/modules/codeeditor/utils.ts @@ -1,5 +1,6 @@ import type {EditorView, ViewUpdate} from '@codemirror/view'; import type {CodemirrorModules} from './main.ts'; +import {trimUrlPunctuation, urlRawRegex} from '../../utils/url.ts'; /** Remove trailing whitespace from all lines in the editor. */ export function trimTrailingWhitespaceFromView(view: EditorView): void { @@ -15,22 +16,9 @@ export function trimTrailingWhitespaceFromView(view: EditorView): void { if (changes.length) view.dispatch({changes}); } -/** Matches URLs, excluding characters that are never valid unencoded in URLs per RFC 3986. */ -export const urlRawRegex = /\bhttps?:\/\/[^\s<>[\]]+/gi; - -/** Strip trailing punctuation that is likely not part of the URL. */ -export function trimUrlPunctuation(url: string): string { - url = url.replace(/[.,;:'"]+$/, ''); - // Strip trailing closing parens only if unbalanced (not part of the URL like Wikipedia links) - while (url.endsWith(')') && (url.match(/\(/g) || []).length < (url.match(/\)/g) || []).length) { - url = url.slice(0, -1); - } - return url; -} - /** Find the URL at the given character position in a document string, or null if none. */ export function findUrlAtPosition(doc: string, pos: number): string | null { - for (const match of doc.matchAll(urlRawRegex)) { + for (const match of doc.matchAll(urlRawRegex())) { const url = trimUrlPunctuation(match[0]); if (match.index !== undefined && pos >= match.index && pos < match.index + url.length) { return url; @@ -67,7 +55,7 @@ export function goToDefinitionAt(cm: CodemirrorModules, view: EditorView, pos: n export function clickableUrls(cm: CodemirrorModules) { const urlMark = cm.view.Decoration.mark({class: 'cm-url'}); const urlDecorator = new cm.view.MatchDecorator({ - regexp: urlRawRegex, + regexp: urlRawRegex(), decorate: (add, from, _to, match) => { const trimmed = trimUrlPunctuation(match[0]); add(from, from + trimmed.length, urlMark); diff --git a/web_src/js/render/ansi.test.ts b/web_src/js/render/ansi.test.ts index 2d9b8ede008..e7e7af8928b 100644 --- a/web_src/js/render/ansi.test.ts +++ b/web_src/js/render/ansi.test.ts @@ -1,6 +1,12 @@ -import {renderAnsi} from './ansi.ts'; +import {renderAnsiInto} from './ansi.ts'; test('renderAnsi', () => { + const renderAnsi = (line: string) => { + const el = document.createElement('div'); + renderAnsiInto(el, line); + return el.innerHTML; + }; + expect(renderAnsi('abc')).toEqual('abc'); expect(renderAnsi('abc\n')).toEqual('abc'); expect(renderAnsi('abc\r\n')).toEqual('abc'); @@ -20,6 +26,9 @@ test('renderAnsi', () => { // URLs in ANSI output become clickable links const link = (url: string) => `${url}`; - expect(renderAnsi('Downloading https://github.com/actions/upload-artifact/releases')).toEqual(`Downloading ${link('https://github.com/actions/upload-artifact/releases')}`); - expect(renderAnsi('\x1b[32mhttps://proxy.golang.org/cached-only\x1b[0m')).toEqual(`${link('https://proxy.golang.org/cached-only')}`); + expect(renderAnsi('foo https://example.com bar')).toEqual(`foo ${link('https://example.com')} bar`); + expect(renderAnsi('')).toEqual(`<${link('https://example.com?a=b&c=d#h')}>`); + expect(renderAnsi('open https://example.com.')).toEqual(`open ${link('https://example.com')}.`); + expect(renderAnsi('"https://example.com"')).toEqual(`"${link('https://example.com')}"`); + expect(renderAnsi('\x1b[32mhttps://example.com\x1b[0m')).toEqual(`${link('https://example.com')}`); }); diff --git a/web_src/js/render/ansi.ts b/web_src/js/render/ansi.ts index 4625e542333..abab3a212ce 100644 --- a/web_src/js/render/ansi.ts +++ b/web_src/js/render/ansi.ts @@ -1,5 +1,5 @@ import {AnsiUp} from 'ansi_up'; -import {linkifyURLs} from '../utils/url.ts'; +import {trimUrlPunctuation, urlRawRegex} from '../utils/url.ts'; const replacements: Array<[RegExp, string]> = [ [/\x1b\[\d+[A-H]/g, ''], // Move cursor, treat them as no-op @@ -7,7 +7,7 @@ const replacements: Array<[RegExp, string]> = [ ]; // render ANSI to HTML -export function renderAnsi(line: string): string { +export function renderAnsiInto(el: HTMLElement, line: string) { // create a fresh ansi_up instance because otherwise previous renders can influence // the output of future renders, because ansi_up is stateful and remembers things like // unclosed opening tags for colors. @@ -44,5 +44,42 @@ export function renderAnsi(line: string): string { result = lines.join('\n'); } - return linkifyURLs(result); + el.innerHTML = result; + // at the moment, only need to do post-process when there are potential URL links + if (result.includes('://')) renderAnsiPostProcessNode(el); +} + +function renderAnsiProcessText(node: ChildNode): ChildNode { + const text = node.textContent!; + const match = urlRawRegex().exec(text); + if (!match || match.index === undefined) return node; + + const before = text.slice(0, match.index); + const urlMatched = match[0]; + const urlTrimmed = trimUrlPunctuation(urlMatched); + const after = text.slice(match.index + urlMatched.length - (urlMatched.length - urlTrimmed.length)); + + const link = document.createElement('a'); + link.setAttribute('href', urlTrimmed); + link.setAttribute('target', '_blank'); + link.textContent = urlTrimmed; + + const newNodes: Array = []; + if (before) newNodes.push(before); + newNodes.push(link); + if (after) newNodes.push(after); + + node.replaceWith(...newNodes); + return link; +} + +function renderAnsiPostProcessNode(el: ChildNode) { + for (let node = el.firstChild; node; node = node.nextSibling) { + if (node.nodeName === 'A') continue; + if (node.nodeType !== Node.TEXT_NODE) { + renderAnsiPostProcessNode(node); + continue; + } + node = renderAnsiProcessText(node); + } } diff --git a/web_src/js/utils/url.test.ts b/web_src/js/utils/url.test.ts index 0a7e27ca619..580b03eccc4 100644 --- a/web_src/js/utils/url.test.ts +++ b/web_src/js/utils/url.test.ts @@ -1,4 +1,4 @@ -import {linkifyURLs, pathEscape, pathEscapeSegments, urlQueryEscape} from './url.ts'; +import {pathEscape, pathEscapeSegments, trimUrlPunctuation, urlQueryEscape, urlRawRegex} from './url.ts'; describe('escape', () => { const queryNonAscii = " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; @@ -19,29 +19,36 @@ describe('escape', () => { }); }); -test('linkifyURLs', () => { - const link = (url: string) => `${url}`; - expect(linkifyURLs('https://example.com')).toEqual(link('https://example.com')); - expect(linkifyURLs('https://dl.google.com/go/go1.23.6.linux-amd64.tar.gz')).toEqual(link('https://dl.google.com/go/go1.23.6.linux-amd64.tar.gz')); - expect(linkifyURLs('https://example.com/path?query=1&b=2#frag')).toEqual(link('https://example.com/path?query=1&b=2#frag')); - expect(linkifyURLs('visit https://example.com/repo for info')).toEqual(`visit ${link('https://example.com/repo')} for info`); - expect(linkifyURLs('See https://example.com.')).toEqual(`See ${link('https://example.com')}.`); - expect(linkifyURLs('https://example.com, and more')).toEqual(`${link('https://example.com')}, and more`); - expect(linkifyURLs('https://proxy.golang.org/cached-only')).toEqual(`${link('https://proxy.golang.org/cached-only')}`); - expect(linkifyURLs('https://registry.npmjs.org/@types/node')).toEqual(`${link('https://registry.npmjs.org/@types/node')}`); - expect(linkifyURLs('https://a.com and https://b.org')).toEqual(`${link('https://a.com')} and ${link('https://b.org')}`); - expect(linkifyURLs('no urls here')).toEqual('no urls here'); - expect(linkifyURLs('http://example.com/path')).toEqual(link('http://example.com/path')); - expect(linkifyURLs('http://localhost:3000/repo')).toEqual(link('http://localhost:3000/repo')); - expect(linkifyURLs('https://')).toEqual('https://'); - expect(linkifyURLs('Click here')).toEqual('Click here'); - expect(linkifyURLs('Click here')).toEqual('Click here'); - expect(linkifyURLs('https://example.com')).toEqual('https://example.com'); - expect(linkifyURLs('https://evil.com/')).toEqual(`${link('https://evil.com/')}`); - expect(linkifyURLs('https://evil.com/"onmouseover="alert(1)')).toEqual(`${link('https://evil.com/')}"onmouseover="alert(1)`); - expect(linkifyURLs('javascript:alert(1)')).toEqual('javascript:alert(1)'); // eslint-disable-line no-script-url - expect(linkifyURLs("https://evil.com/'onclick='alert(1)")).toEqual(`${link('https://evil.com/')}'onclick='alert(1)`); - expect(linkifyURLs('data:text/html,')).toEqual('data:text/html,'); - expect(linkifyURLs('https://evil.com/\nonclick=alert(1)')).toEqual(`${link('https://evil.com/')}\nonclick=alert(1)`); - expect(linkifyURLs('https://evil.com/"onmouseover=alert(1)')).toEqual(`${link('https://evil.com/"onmouseover=alert')}(1)`); +test('matchUrls', () => { + const matchUrls = (text: string) => Array.from(text.matchAll(urlRawRegex()), (m) => trimUrlPunctuation(m[0])); + expect(matchUrls('visit https://example.com for info')).toEqual(['https://example.com']); + expect(matchUrls('see https://example.com.')).toEqual(['https://example.com']); + expect(matchUrls('see https://example.com, and')).toEqual(['https://example.com']); + expect(matchUrls('see https://example.com; and')).toEqual(['https://example.com']); + expect(matchUrls('(https://example.com)')).toEqual(['https://example.com']); + expect(matchUrls('"https://example.com"')).toEqual(['https://example.com']); + expect(matchUrls('https://example.com/path?q=1&b=2#hash')).toEqual(['https://example.com/path?q=1&b=2#hash']); + expect(matchUrls('https://example.com/path?q=1&b=2#hash.')).toEqual(['https://example.com/path?q=1&b=2#hash']); + expect(matchUrls('https://x.co')).toEqual(['https://x.co']); + expect(matchUrls('https://example.com/path_(wiki)')).toEqual(['https://example.com/path_(wiki)']); + expect(matchUrls('https://en.wikipedia.org/wiki/Rust_(programming_language)')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']); + expect(matchUrls('(https://en.wikipedia.org/wiki/Rust_(programming_language))')).toEqual(['https://en.wikipedia.org/wiki/Rust_(programming_language)']); + expect(matchUrls('http://example.com')).toEqual(['http://example.com']); + expect(matchUrls('no url here')).toEqual([]); + expect(matchUrls('https://a.com and https://b.com')).toEqual(['https://a.com', 'https://b.com']); + expect(matchUrls('[![](https://img.shields.io/npm/v/pkg.svg?style=flat)](https://www.npmjs.org/package/pkg)')).toEqual(['https://img.shields.io/npm/v/pkg.svg?style=flat', 'https://www.npmjs.org/package/pkg']); +}); + +test('trimUrlPunctuation', () => { + expect(trimUrlPunctuation('https://example.com.')).toEqual('https://example.com'); + expect(trimUrlPunctuation('https://example.com,')).toEqual('https://example.com'); + expect(trimUrlPunctuation('https://example.com;')).toEqual('https://example.com'); + expect(trimUrlPunctuation('https://example.com:')).toEqual('https://example.com'); + expect(trimUrlPunctuation("https://example.com'")).toEqual('https://example.com'); + expect(trimUrlPunctuation('https://example.com"')).toEqual('https://example.com'); + expect(trimUrlPunctuation('https://example.com.,;')).toEqual('https://example.com'); + expect(trimUrlPunctuation('https://example.com/path')).toEqual('https://example.com/path'); + expect(trimUrlPunctuation('https://example.com/path_(wiki)')).toEqual('https://example.com/path_(wiki)'); + expect(trimUrlPunctuation('https://example.com)')).toEqual('https://example.com'); + expect(trimUrlPunctuation('https://en.wikipedia.org/wiki/Rust_(lang))')).toEqual('https://en.wikipedia.org/wiki/Rust_(lang)'); }); diff --git a/web_src/js/utils/url.ts b/web_src/js/utils/url.ts index 06e1aa29511..441c1e26e13 100644 --- a/web_src/js/utils/url.ts +++ b/web_src/js/utils/url.ts @@ -1,4 +1,15 @@ -import {html, htmlRaw} from './html.ts'; +/** Matches URLs, excluding characters that are never valid unencoded in URLs per RFC 3986. */ +export const urlRawRegex = () => /\bhttps?:\/\/[^\s<>[\]]+/gi; // JS regexp has internal states, so always use a new instance + +/** Strip trailing punctuation that is likely not part of the URL. */ +export function trimUrlPunctuation(url: string): string { + url = url.replace(/[.,;:'"]+$/, ''); + // Strip trailing closing parens only if unbalanced (not part of the URL like Wikipedia links) + while (url.endsWith(')') && (url.match(/\(/g) || []).length < (url.match(/\)/g) || []).length) { + url = url.slice(0, -1); + } + return url; +} export function urlQueryEscape(s: string) { // See "TestQueryEscape" in backend @@ -31,31 +42,3 @@ export function pathEscapeSegments(s: string): string { // The same as backend's PathEscapeSegments return s.split('/').map(pathEscape).join('/'); } - -// Match HTML tags (to skip) or URLs (to linkify) in HTML content -const urlLinkifyPattern = /(<([-\w]+)[^>]*>)|(<\/([-\w]+)[^>]*>)|(https?:\/\/[^\s<>"'`|(){}[\]]+)/gi; -const trailingPunctPattern = /[.,;:!?]+$/; - -// Convert URLs to clickable links in HTML, preserving existing HTML tags -export function linkifyURLs(htmlString: string): string { - let inAnchor = false; - return htmlString.replace(urlLinkifyPattern, (match, _openTagFull, openTag, _closeTagFull, closeTag, url) => { - // skip URLs inside existing tags - if (openTag === 'a') { - inAnchor = true; - return match; - } else if (closeTag === 'a') { - inAnchor = false; - return match; - } - if (inAnchor || !url) { - return match; - } - - const trailingPunct = url.match(trailingPunctPattern); - const cleanUrl = trailingPunct ? url.slice(0, -trailingPunct[0].length) : url; - const trailing = trailingPunct ? trailingPunct[0] : ''; - // safe because regexp only matches valid URLs (no quotes or angle brackets) - return html`${htmlRaw(cleanUrl)}${htmlRaw(trailing)}`; - }); -}