Markdown: allow to end URL with balanced parenthesis (#18321)

* Markdown: allow to end URL with balanced parenthesis

* Update lib/packages/docutils/rst.nim

Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>

* apply suggestion

* remove unnecessary if

Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>
This commit is contained in:
Andrey Makarov
2021-06-23 08:50:05 +03:00
committed by GitHub
parent 5badeea170
commit 9c43f05099
2 changed files with 75 additions and 2 deletions

View File

@@ -1246,15 +1246,43 @@ proc isUrl(p: RstParser, i: int): bool =
p.tok[i+3].kind == tkWord and
p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]
proc checkParen(token: Token, parensStack: var seq[char]): bool {.inline.} =
## Returns `true` iff `token` is a closing parenthesis for some
## previous opening parenthesis saved in `parensStack`.
## This is according Markdown balanced parentheses rule
## (https://spec.commonmark.org/0.29/#link-destination)
## to allow links like
## https://en.wikipedia.org/wiki/APL_(programming_language),
## we use it for RST also.
result = false
if token.kind == tkPunct:
let c = token.symbol[0]
if c in {'(', '[', '{'}: # push
parensStack.add c
elif c in {')', ']', '}'}: # try pop
# a case like ([) inside a link is allowed and [ is also `pop`ed:
for i in countdown(parensStack.len - 1, 0):
if (parensStack[i] == '(' and c == ')' or
parensStack[i] == '[' and c == ']' or
parensStack[i] == '{' and c == '}'):
parensStack.setLen i
result = true
break
proc parseUrl(p: var RstParser): PRstNode =
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks
result = newRstNode(rnStandaloneHyperlink)
var lastIdx = p.idx
var closedParenIdx = p.idx - 1 # for balanced parens rule
var parensStack: seq[char]
while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}:
let isClosing = checkParen(p.tok[lastIdx], parensStack)
if isClosing:
closedParenIdx = lastIdx
inc lastIdx
dec lastIdx
# standalone URL can not end with punctuation in RST
while lastIdx >= p.idx and p.tok[lastIdx].kind == tkPunct and
while lastIdx > closedParenIdx and p.tok[lastIdx].kind == tkPunct and
p.tok[lastIdx].symbol != "/":
dec lastIdx
var s = ""
@@ -1393,11 +1421,15 @@ proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
var desc, link = ""
var i = p.idx
var parensStack: seq[char]
template parse(endToken, dest) =
parensStack.setLen 0
inc i # skip begin token
while true:
if p.tok[i].kind in {tkEof, tkIndent}: return false
if p.tok[i].symbol == endToken: break
let isClosing = checkParen(p.tok[i], parensStack)
if p.tok[i].symbol == endToken and not isClosing:
break
dest.add p.tok[i].symbol
inc i
inc i # skip end token

View File

@@ -580,3 +580,44 @@ suite "RST inline markup":
rnLeaf ' '
rnLeaf 'end'
""")
test "URL with balanced parentheses (Markdown rule)":
# 2 balanced parens, 1 unbalanced:
check(dedent"""
https://en.wikipedia.org/wiki/APL_((programming_language)))""".toAst ==
dedent"""
rnInner
rnStandaloneHyperlink
rnLeaf 'https://en.wikipedia.org/wiki/APL_((programming_language))'
rnLeaf ')'
""")
# the same for Markdown-style link:
check(dedent"""
[foo [bar]](https://en.wikipedia.org/wiki/APL_((programming_language))))""".toAst ==
dedent"""
rnInner
rnHyperlink
rnLeaf 'foo [bar]'
rnLeaf 'https://en.wikipedia.org/wiki/APL_((programming_language))'
rnLeaf ')'
""")
# unbalanced (here behavior is more RST-like actually):
check(dedent"""
https://en.wikipedia.org/wiki/APL_(programming_language(""".toAst ==
dedent"""
rnInner
rnStandaloneHyperlink
rnLeaf 'https://en.wikipedia.org/wiki/APL_(programming_language'
rnLeaf '('
""")
# unbalanced [, but still acceptable:
check(dedent"""
[my {link example](http://example.com/bracket_(symbol_[))""".toAst ==
dedent"""
rnHyperlink
rnLeaf 'my {link example'
rnLeaf 'http://example.com/bracket_(symbol_[)'
""")