docs: make inline markup more compatible with Markdown (#18053)

fixes https://github.com/timotheecour/Nim/issues/739
This commit is contained in:
Andrey Makarov
2021-05-21 07:54:20 +03:00
committed by GitHub
parent 6a5973882b
commit 9f7e2e3057
6 changed files with 168 additions and 62 deletions

View File

@@ -146,7 +146,7 @@ character meaning
After ``\x``, from zero to two hexadecimal digits are read (letters can be in
upper or lower case). In UTF-8 mode, any number of hexadecimal digits may
appear between ``\x{`` and ``}``, but the value of the character code must be
less than 2**31 (that is, the maximum hexadecimal value is 7FFFFFFF). If
less than 2^31 (that is, the maximum hexadecimal value is 7FFFFFFF). If
characters other than hexadecimal digits appear between ``\x{`` and ``}``, or
if there is no terminating ``}``, this form of escape is not recognized.
Instead, the initial ``\x`` will be interpreted as a basic hexadecimal escape,

View File

@@ -152,7 +152,7 @@
## Instead, a `seq[string]` is returned for each row.
##
## The reasoning is as follows:
## 1. it's close to what many DBs offer natively (char**)
## 1. it's close to what many DBs offer natively (`char**`:c:)
## 2. it hides the number of types that the DB supports
## (int? int64? decimal up to 10 places? geo coords?)
## 3. it's convenient when all you do is to forward the data to somewhere else (echo, log, put the data into a new query)

View File

@@ -130,6 +130,32 @@
## .. warning:: Using Nim-specific features can cause other RST implementations
## to fail on your document.
##
## Idiosyncrasies
## --------------
##
## Currently we do **not** aim at 100% Markdown or RST compatibility in inline
## markup recognition rules because that would provide very little user value.
## This parser has 2 modes for inline markup:
##
## 1) Markdown-like mode which is enabled by `roPreferMarkdown` option
## (turned **on** by default).
##
## .. Note:: RST features like directives are still turned **on**
##
## 2) Compatibility mode which is RST rules.
##
## .. Note:: in both modes the parser interpretes text between single
## backticks (code) identically:
## backslash does not escape; the only exception: ``\`` folowed by `
## does escape so that we can always input a single backtick ` in
## inline code. However that makes impossible to input code with
## ``\`` at the end in *single* backticks, one must use *double*
## backticks::
##
## `\` -- WRONG
## ``\`` -- GOOD
## So single backticks can always be input: `\`` will turn to ` code
##
## Limitations
## -----------
##
@@ -994,8 +1020,22 @@ proc expect(p: var RstParser, tok: string) =
if currentTok(p).symbol == tok: inc p.idx
else: rstMessage(p, meExpected, tok)
proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
proc inlineMarkdownEnd(p: RstParser): bool =
result = prevTok(p).kind notin {tkIndent, tkWhite}
## (For a special case of ` we don't allow spaces surrounding it
## unlike original Markdown because this behavior confusing/useless)
proc inlineRstEnd(p: RstParser): bool =
# rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
# Rule 2:
result = prevTok(p).kind notin {tkIndent, tkWhite}
if not result: return
# Rule 7:
result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
nextTok(p).symbol[0] in
{'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
if exact:
result = currentTok(p).symbol == markup
else:
@@ -1004,55 +1044,58 @@ proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
# check that escaping may have splitted `` to 2 tokens ` and `
result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
if not result: return
# Rule 2:
result = prevTok(p).kind notin {tkIndent, tkWhite}
if not result: return
# Rule 7:
result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
(roPreferMarkdown in p.s.options and
markup in ["``", "`"] and
nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or
nextTok(p).symbol[0] in
{'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
if not result: return
# Rule 4:
if p.idx > 0:
# see bug #17260; for now `\` must be written ``\``, likewise with sequences
# ending in an un-escaped `\`; `\\` is legal but not `\\\` for example;
# for this reason we can't use `["``", "`"]` here.
if markup != "``" and prevTok(p).symbol == "\\":
result = false
# surroundings check
if markup in ["_", "__"]:
result = inlineRstEnd(p)
else:
if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
else: result = inlineRstEnd(p)
proc isInlineMarkupStart(p: RstParser, markup: string): bool =
# rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
var d: char
if markup != "_`":
result = currentTok(p).symbol == markup
else: # _` is a 2 token case
result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
proc rstRuleSurround(p: RstParser): bool =
result = true
# Rules 4 & 5:
if p.idx > 0:
var d: char
var c = prevTok(p).symbol[0]
case c
of '\'', '\"': d = c
of '(': d = ')'
of '[': d = ']'
of '{': d = '}'
of '<': d = '>'
else: d = '\0'
if d != '\0': result = nextTok(p).symbol[0] != d
proc inlineMarkdownStart(p: RstParser): bool =
result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
if not result: return
# Rule 6:
# this rst rule is really nice, let us use it in Markdown mode too.
result = rstRuleSurround(p)
proc inlineRstStart(p: RstParser): bool =
## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
# Rule 6
result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
(markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or
prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
if not result: return
# Rule 1:
result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
if not result: return
# Rules 4 & 5:
if p.idx > 0:
if prevTok(p).symbol == "\\":
result = false
else:
var c = prevTok(p).symbol[0]
case c
of '\'', '\"': d = c
of '(': d = ')'
of '[': d = ']'
of '{': d = '}'
of '<': d = '>'
else: d = '\0'
if d != '\0': result = nextTok(p).symbol[0] != d
result = rstRuleSurround(p)
proc isInlineMarkupStart(p: RstParser, markup: string): bool =
if markup != "_`":
result = currentTok(p).symbol == markup
else: # _` is a 2 token case
result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
if not result: return
# surroundings check
if markup in ["_", "__", "[", "|"]:
# Note: we require space/punctuation even before [markdown link](...)
result = inlineRstStart(p)
else:
if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
else: result = inlineRstStart(p)
proc match(p: RstParser, start: int, expr: string): bool =
# regular expressions are:
@@ -1263,10 +1306,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) =
proc parseBackslash(p: var RstParser, father: PRstNode) =
assert(currentTok(p).kind == tkPunct)
if currentTok(p).symbol == "\\\\":
father.add newLeaf("\\")
inc p.idx
elif currentTok(p).symbol == "\\":
if currentTok(p).symbol == "\\":
# XXX: Unicode?
inc p.idx
if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
@@ -1297,11 +1337,20 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
break
else:
if postfix == "`":
if prevTok(p).symbol == "\\" and currentTok(p).symbol == "`":
father.sons[^1] = newLeaf(p) # instead, we should use lookahead
if currentTok(p).symbol == "\\":
if nextTok(p).symbol == "\\":
father.add newLeaf("\\")
father.add newLeaf("\\")
inc p.idx, 2
elif nextTok(p).symbol == "`": # escape `
father.add newLeaf("`")
inc p.idx, 2
else:
father.add newLeaf("\\")
inc p.idx
else:
father.add(newLeaf(p))
inc p.idx
inc p.idx
else:
if interpretBackslash:
parseBackslash(p, father)

View File

@@ -7,7 +7,7 @@
#
## A set of helpers for the POSIX module.
## Raw interfaces are in the other posix*.nim files.
## Raw interfaces are in the other ``posix*.nim`` files.
# Where possible, contribute OS-independent procs in `os <os.html>`_ instead.

View File

@@ -23,7 +23,7 @@ import std/private/miscdollars
import os
proc toAst(input: string,
rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile},
rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile},
error: ref string = nil,
warnings: ref seq[string] = nil): string =
## If `error` is nil then no errors should be generated.
@@ -36,10 +36,11 @@ proc toAst(input: string,
toLocation(message, filename, line, col + ColRstOffset)
message.add " $1: $2" % [$mc, a]
if mc == mcError:
doAssert error != nil, "unexpected RST error '" & message & "'"
if error == nil:
raise newException(EParseError, "[unexpected error] " & message)
error[] = message
# we check only first error because subsequent ones may be meaningless
raise newException(EParseError, message)
raise newException(EParseError, "")
else:
doAssert warnings != nil, "unexpected RST warning '" & message & "'"
warnings[].add message
@@ -54,8 +55,9 @@ proc toAst(input: string,
var rst = rstParse(input, filen, line=LineRstInit, column=ColRstInit,
dummyHasToc, rstOptions, myFindFile, testMsgHandler)
result = renderRstToStr(rst)
except EParseError:
discard
except EParseError as e:
if e.msg != "":
result = e.msg
suite "RST parsing":
test "option list has priority over definition list":
@@ -326,6 +328,28 @@ suite "RST escaping":
""")
suite "RST inline markup":
test "* and ** surrounded by spaces are not inline markup":
check("a * b * c ** d ** e".toAst == dedent"""
rnInner
rnLeaf 'a'
rnLeaf ' '
rnLeaf '*'
rnLeaf ' '
rnLeaf 'b'
rnLeaf ' '
rnLeaf '*'
rnLeaf ' '
rnLeaf 'c'
rnLeaf ' '
rnLeaf '**'
rnLeaf ' '
rnLeaf 'd'
rnLeaf ' '
rnLeaf '**'
rnLeaf ' '
rnLeaf 'e'
""")
test "end-string has repeating symbols":
check("*emphasis content****".toAst == dedent"""
rnEmphasis
@@ -420,6 +444,37 @@ suite "RST inline markup":
rnLeaf 'proc `+`'
""")
check("""`\\`""".toAst ==
dedent"""
rnInlineCode
rnDirArg
rnLeaf 'nim'
[nil]
rnLiteralBlock
rnLeaf '\\'
""")
test "Markdown-style code/backtick":
# no whitespace is required before `
check("`try`...`except`".toAst ==
dedent"""
rnInner
rnInlineCode
rnDirArg
rnLeaf 'nim'
[nil]
rnLiteralBlock
rnLeaf 'try'
rnLeaf '...'
rnInlineCode
rnDirArg
rnLeaf 'nim'
[nil]
rnLiteralBlock
rnLeaf 'except'
""")
test """inline literals can contain \ anywhere""":
check("""``\``""".toAst == dedent"""
rnInlineLiteral

View File

@@ -10,7 +10,7 @@ import unittest, strutils, strtabs
import std/private/miscdollars
proc toHtml(input: string,
rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile},
rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile},
error: ref string = nil,
warnings: ref seq[string] = nil): string =
## If `error` is nil then no errors should be generated.
@@ -23,18 +23,20 @@ proc toHtml(input: string,
toLocation(message, filename, line, col + ColRstOffset)
message.add " $1: $2" % [$mc, a]
if mc == mcError:
doAssert error != nil, "unexpected RST error '" & message & "'"
if error == nil:
raise newException(EParseError, "[unexpected error] " & message)
error[] = message
# we check only first error because subsequent ones may be meaningless
raise newException(EParseError, message)
raise newException(EParseError, "")
else:
doAssert warnings != nil, "unexpected RST warning '" & message & "'"
warnings[].add message
try:
result = rstToHtml(input, rstOptions, defaultConfig(),
msgHandler=testMsgHandler)
except EParseError:
discard
except EParseError as e:
if e.msg != "":
result = e.msg
# inline code tags (for parsing originated from highlite.nim)
proc id(str: string): string = """<span class="Identifier">""" & str & "</span>"