Fix nim-lang/nimforum#285 - punctuation after URL (#17908)

* Fix nim-lang/nimforum#285 - punctuation after URL
* keep only one leaf in a rnStandaloneHyperlink
* add more complex URL
This commit is contained in:
Andrey Makarov
2021-05-01 08:20:33 +03:00
committed by GitHub
parent 34a09574ce
commit abb8a73134
2 changed files with 67 additions and 11 deletions

View File

@@ -1213,23 +1213,29 @@ proc isUrl(p: RstParser, i: int): bool =
p.tok[i+3].kind == tkWord and
p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]
proc parseUrl(p: var RstParser): PRstNode =
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks
result = newRstNode(rnStandaloneHyperlink)
var lastIdx = p.idx
while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}:
inc lastIdx
dec lastIdx
# standalone URL can not end with punctuation in RST
while lastIdx >= p.idx and p.tok[lastIdx].kind == tkPunct and
p.tok[lastIdx].symbol != "/":
dec lastIdx
var s = ""
for i in p.idx .. lastIdx: s.add p.tok[i].symbol
result.add s
p.idx = lastIdx + 1
proc parseWordOrRef(p: var RstParser, father: PRstNode) =
## Parses a normal word or may be a reference or URL.
if nextTok(p).kind != tkPunct: # <- main path, a normal word
father.add newLeaf(p)
inc p.idx
elif isUrl(p, p.idx): # URL http://something
var n = newRstNode(rnStandaloneHyperlink)
while true:
case currentTok(p).kind
of tkWord, tkAdornment, tkOther: discard
of tkPunct:
if nextTok(p).kind notin {tkWord, tkAdornment, tkOther, tkPunct}:
break
else: break
n.add(newLeaf(p))
inc p.idx
father.add(n)
father.add parseUrl(p)
else:
# check for reference (probably, long one like some.ref.with.dots_ )
var saveIdx = p.idx

View File

@@ -428,3 +428,53 @@ suite "RST inline markup":
rnLeaf 'lnk'
rnLeaf '___'
""")
test "no punctuation in the end of a standalone URI is allowed":
check(dedent"""
[see (http://no.org)], end""".toAst ==
dedent"""
rnInner
rnLeaf '['
rnLeaf 'see'
rnLeaf ' '
rnLeaf '('
rnStandaloneHyperlink
rnLeaf 'http://no.org'
rnLeaf ')'
rnLeaf ']'
rnLeaf ','
rnLeaf ' '
rnLeaf 'end'
""")
# but `/` at the end is OK
check(
dedent"""
See http://no.org/ end""".toAst ==
dedent"""
rnInner
rnLeaf 'See'
rnLeaf ' '
rnStandaloneHyperlink
rnLeaf 'http://no.org/'
rnLeaf ' '
rnLeaf 'end'
""")
# a more complex URL with some made-up ending '&='.
# Github Markdown would include final &= and
# so would rst2html.py in contradiction with RST spec.
check(
dedent"""
See https://www.google.com/url?sa=t&source=web&cd=&cad=rja&url=https%3A%2F%2Fnim-lang.github.io%2FNim%2Frst.html%23features&usg=AO&= end""".toAst ==
dedent"""
rnInner
rnLeaf 'See'
rnLeaf ' '
rnStandaloneHyperlink
rnLeaf 'https://www.google.com/url?sa=t&source=web&cd=&cad=rja&url=https%3A%2F%2Fnim-lang.github.io%2FNim%2Frst.html%23features&usg=AO'
rnLeaf '&'
rnLeaf '='
rnLeaf ' '
rnLeaf 'end'
""")