mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-19 05:50:30 +00:00
docs: make inline markup more compatible with Markdown (#18053)
fixes https://github.com/timotheecour/Nim/issues/739
This commit is contained in:
@@ -146,7 +146,7 @@ character meaning
|
||||
After ``\x``, from zero to two hexadecimal digits are read (letters can be in
|
||||
upper or lower case). In UTF-8 mode, any number of hexadecimal digits may
|
||||
appear between ``\x{`` and ``}``, but the value of the character code must be
|
||||
less than 2**31 (that is, the maximum hexadecimal value is 7FFFFFFF). If
|
||||
less than 2^31 (that is, the maximum hexadecimal value is 7FFFFFFF). If
|
||||
characters other than hexadecimal digits appear between ``\x{`` and ``}``, or
|
||||
if there is no terminating ``}``, this form of escape is not recognized.
|
||||
Instead, the initial ``\x`` will be interpreted as a basic hexadecimal escape,
|
||||
|
||||
@@ -152,7 +152,7 @@
|
||||
## Instead, a `seq[string]` is returned for each row.
|
||||
##
|
||||
## The reasoning is as follows:
|
||||
## 1. it's close to what many DBs offer natively (char**)
|
||||
## 1. it's close to what many DBs offer natively (`char**`:c:)
|
||||
## 2. it hides the number of types that the DB supports
|
||||
## (int? int64? decimal up to 10 places? geo coords?)
|
||||
## 3. it's convenient when all you do is to forward the data to somewhere else (echo, log, put the data into a new query)
|
||||
|
||||
@@ -130,6 +130,32 @@
|
||||
## .. warning:: Using Nim-specific features can cause other RST implementations
|
||||
## to fail on your document.
|
||||
##
|
||||
## Idiosyncrasies
|
||||
## --------------
|
||||
##
|
||||
## Currently we do **not** aim at 100% Markdown or RST compatibility in inline
|
||||
## markup recognition rules because that would provide very little user value.
|
||||
## This parser has 2 modes for inline markup:
|
||||
##
|
||||
## 1) Markdown-like mode which is enabled by `roPreferMarkdown` option
|
||||
## (turned **on** by default).
|
||||
##
|
||||
## .. Note:: RST features like directives are still turned **on**
|
||||
##
|
||||
## 2) Compatibility mode which is RST rules.
|
||||
##
|
||||
## .. Note:: in both modes the parser interpretes text between single
|
||||
## backticks (code) identically:
|
||||
## backslash does not escape; the only exception: ``\`` folowed by `
|
||||
## does escape so that we can always input a single backtick ` in
|
||||
## inline code. However that makes impossible to input code with
|
||||
## ``\`` at the end in *single* backticks, one must use *double*
|
||||
## backticks::
|
||||
##
|
||||
## `\` -- WRONG
|
||||
## ``\`` -- GOOD
|
||||
## So single backticks can always be input: `\`` will turn to ` code
|
||||
##
|
||||
## Limitations
|
||||
## -----------
|
||||
##
|
||||
@@ -994,8 +1020,22 @@ proc expect(p: var RstParser, tok: string) =
|
||||
if currentTok(p).symbol == tok: inc p.idx
|
||||
else: rstMessage(p, meExpected, tok)
|
||||
|
||||
proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
|
||||
proc inlineMarkdownEnd(p: RstParser): bool =
|
||||
result = prevTok(p).kind notin {tkIndent, tkWhite}
|
||||
## (For a special case of ` we don't allow spaces surrounding it
|
||||
## unlike original Markdown because this behavior confusing/useless)
|
||||
|
||||
proc inlineRstEnd(p: RstParser): bool =
|
||||
# rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
|
||||
# Rule 2:
|
||||
result = prevTok(p).kind notin {tkIndent, tkWhite}
|
||||
if not result: return
|
||||
# Rule 7:
|
||||
result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
|
||||
nextTok(p).symbol[0] in
|
||||
{'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
|
||||
|
||||
proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
|
||||
if exact:
|
||||
result = currentTok(p).symbol == markup
|
||||
else:
|
||||
@@ -1004,55 +1044,58 @@ proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
|
||||
# check that escaping may have splitted `` to 2 tokens ` and `
|
||||
result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
|
||||
if not result: return
|
||||
# Rule 2:
|
||||
result = prevTok(p).kind notin {tkIndent, tkWhite}
|
||||
if not result: return
|
||||
# Rule 7:
|
||||
result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
|
||||
(roPreferMarkdown in p.s.options and
|
||||
markup in ["``", "`"] and
|
||||
nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or
|
||||
nextTok(p).symbol[0] in
|
||||
{'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
|
||||
if not result: return
|
||||
# Rule 4:
|
||||
if p.idx > 0:
|
||||
# see bug #17260; for now `\` must be written ``\``, likewise with sequences
|
||||
# ending in an un-escaped `\`; `\\` is legal but not `\\\` for example;
|
||||
# for this reason we can't use `["``", "`"]` here.
|
||||
if markup != "``" and prevTok(p).symbol == "\\":
|
||||
result = false
|
||||
# surroundings check
|
||||
if markup in ["_", "__"]:
|
||||
result = inlineRstEnd(p)
|
||||
else:
|
||||
if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
|
||||
else: result = inlineRstEnd(p)
|
||||
|
||||
proc isInlineMarkupStart(p: RstParser, markup: string): bool =
|
||||
# rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
|
||||
var d: char
|
||||
if markup != "_`":
|
||||
result = currentTok(p).symbol == markup
|
||||
else: # _` is a 2 token case
|
||||
result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
|
||||
proc rstRuleSurround(p: RstParser): bool =
|
||||
result = true
|
||||
# Rules 4 & 5:
|
||||
if p.idx > 0:
|
||||
var d: char
|
||||
var c = prevTok(p).symbol[0]
|
||||
case c
|
||||
of '\'', '\"': d = c
|
||||
of '(': d = ')'
|
||||
of '[': d = ']'
|
||||
of '{': d = '}'
|
||||
of '<': d = '>'
|
||||
else: d = '\0'
|
||||
if d != '\0': result = nextTok(p).symbol[0] != d
|
||||
|
||||
proc inlineMarkdownStart(p: RstParser): bool =
|
||||
result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
|
||||
if not result: return
|
||||
# Rule 6:
|
||||
# this rst rule is really nice, let us use it in Markdown mode too.
|
||||
result = rstRuleSurround(p)
|
||||
|
||||
proc inlineRstStart(p: RstParser): bool =
|
||||
## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
|
||||
# Rule 6
|
||||
result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
|
||||
(markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or
|
||||
prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
|
||||
if not result: return
|
||||
# Rule 1:
|
||||
result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
|
||||
if not result: return
|
||||
# Rules 4 & 5:
|
||||
if p.idx > 0:
|
||||
if prevTok(p).symbol == "\\":
|
||||
result = false
|
||||
else:
|
||||
var c = prevTok(p).symbol[0]
|
||||
case c
|
||||
of '\'', '\"': d = c
|
||||
of '(': d = ')'
|
||||
of '[': d = ']'
|
||||
of '{': d = '}'
|
||||
of '<': d = '>'
|
||||
else: d = '\0'
|
||||
if d != '\0': result = nextTok(p).symbol[0] != d
|
||||
result = rstRuleSurround(p)
|
||||
|
||||
proc isInlineMarkupStart(p: RstParser, markup: string): bool =
|
||||
if markup != "_`":
|
||||
result = currentTok(p).symbol == markup
|
||||
else: # _` is a 2 token case
|
||||
result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
|
||||
if not result: return
|
||||
# surroundings check
|
||||
if markup in ["_", "__", "[", "|"]:
|
||||
# Note: we require space/punctuation even before [markdown link](...)
|
||||
result = inlineRstStart(p)
|
||||
else:
|
||||
if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
|
||||
else: result = inlineRstStart(p)
|
||||
|
||||
proc match(p: RstParser, start: int, expr: string): bool =
|
||||
# regular expressions are:
|
||||
@@ -1263,10 +1306,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) =
|
||||
|
||||
proc parseBackslash(p: var RstParser, father: PRstNode) =
|
||||
assert(currentTok(p).kind == tkPunct)
|
||||
if currentTok(p).symbol == "\\\\":
|
||||
father.add newLeaf("\\")
|
||||
inc p.idx
|
||||
elif currentTok(p).symbol == "\\":
|
||||
if currentTok(p).symbol == "\\":
|
||||
# XXX: Unicode?
|
||||
inc p.idx
|
||||
if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
|
||||
@@ -1297,11 +1337,20 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
|
||||
break
|
||||
else:
|
||||
if postfix == "`":
|
||||
if prevTok(p).symbol == "\\" and currentTok(p).symbol == "`":
|
||||
father.sons[^1] = newLeaf(p) # instead, we should use lookahead
|
||||
if currentTok(p).symbol == "\\":
|
||||
if nextTok(p).symbol == "\\":
|
||||
father.add newLeaf("\\")
|
||||
father.add newLeaf("\\")
|
||||
inc p.idx, 2
|
||||
elif nextTok(p).symbol == "`": # escape `
|
||||
father.add newLeaf("`")
|
||||
inc p.idx, 2
|
||||
else:
|
||||
father.add newLeaf("\\")
|
||||
inc p.idx
|
||||
else:
|
||||
father.add(newLeaf(p))
|
||||
inc p.idx
|
||||
inc p.idx
|
||||
else:
|
||||
if interpretBackslash:
|
||||
parseBackslash(p, father)
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#
|
||||
|
||||
## A set of helpers for the POSIX module.
|
||||
## Raw interfaces are in the other posix*.nim files.
|
||||
## Raw interfaces are in the other ``posix*.nim`` files.
|
||||
|
||||
# Where possible, contribute OS-independent procs in `os <os.html>`_ instead.
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ import std/private/miscdollars
|
||||
import os
|
||||
|
||||
proc toAst(input: string,
|
||||
rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile},
|
||||
rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile},
|
||||
error: ref string = nil,
|
||||
warnings: ref seq[string] = nil): string =
|
||||
## If `error` is nil then no errors should be generated.
|
||||
@@ -36,10 +36,11 @@ proc toAst(input: string,
|
||||
toLocation(message, filename, line, col + ColRstOffset)
|
||||
message.add " $1: $2" % [$mc, a]
|
||||
if mc == mcError:
|
||||
doAssert error != nil, "unexpected RST error '" & message & "'"
|
||||
if error == nil:
|
||||
raise newException(EParseError, "[unexpected error] " & message)
|
||||
error[] = message
|
||||
# we check only first error because subsequent ones may be meaningless
|
||||
raise newException(EParseError, message)
|
||||
raise newException(EParseError, "")
|
||||
else:
|
||||
doAssert warnings != nil, "unexpected RST warning '" & message & "'"
|
||||
warnings[].add message
|
||||
@@ -54,8 +55,9 @@ proc toAst(input: string,
|
||||
var rst = rstParse(input, filen, line=LineRstInit, column=ColRstInit,
|
||||
dummyHasToc, rstOptions, myFindFile, testMsgHandler)
|
||||
result = renderRstToStr(rst)
|
||||
except EParseError:
|
||||
discard
|
||||
except EParseError as e:
|
||||
if e.msg != "":
|
||||
result = e.msg
|
||||
|
||||
suite "RST parsing":
|
||||
test "option list has priority over definition list":
|
||||
@@ -326,6 +328,28 @@ suite "RST escaping":
|
||||
""")
|
||||
|
||||
suite "RST inline markup":
|
||||
test "* and ** surrounded by spaces are not inline markup":
|
||||
check("a * b * c ** d ** e".toAst == dedent"""
|
||||
rnInner
|
||||
rnLeaf 'a'
|
||||
rnLeaf ' '
|
||||
rnLeaf '*'
|
||||
rnLeaf ' '
|
||||
rnLeaf 'b'
|
||||
rnLeaf ' '
|
||||
rnLeaf '*'
|
||||
rnLeaf ' '
|
||||
rnLeaf 'c'
|
||||
rnLeaf ' '
|
||||
rnLeaf '**'
|
||||
rnLeaf ' '
|
||||
rnLeaf 'd'
|
||||
rnLeaf ' '
|
||||
rnLeaf '**'
|
||||
rnLeaf ' '
|
||||
rnLeaf 'e'
|
||||
""")
|
||||
|
||||
test "end-string has repeating symbols":
|
||||
check("*emphasis content****".toAst == dedent"""
|
||||
rnEmphasis
|
||||
@@ -420,6 +444,37 @@ suite "RST inline markup":
|
||||
rnLeaf 'proc `+`'
|
||||
""")
|
||||
|
||||
check("""`\\`""".toAst ==
|
||||
dedent"""
|
||||
rnInlineCode
|
||||
rnDirArg
|
||||
rnLeaf 'nim'
|
||||
[nil]
|
||||
rnLiteralBlock
|
||||
rnLeaf '\\'
|
||||
""")
|
||||
|
||||
test "Markdown-style code/backtick":
|
||||
# no whitespace is required before `
|
||||
check("`try`...`except`".toAst ==
|
||||
dedent"""
|
||||
rnInner
|
||||
rnInlineCode
|
||||
rnDirArg
|
||||
rnLeaf 'nim'
|
||||
[nil]
|
||||
rnLiteralBlock
|
||||
rnLeaf 'try'
|
||||
rnLeaf '...'
|
||||
rnInlineCode
|
||||
rnDirArg
|
||||
rnLeaf 'nim'
|
||||
[nil]
|
||||
rnLiteralBlock
|
||||
rnLeaf 'except'
|
||||
""")
|
||||
|
||||
|
||||
test """inline literals can contain \ anywhere""":
|
||||
check("""``\``""".toAst == dedent"""
|
||||
rnInlineLiteral
|
||||
|
||||
@@ -10,7 +10,7 @@ import unittest, strutils, strtabs
|
||||
import std/private/miscdollars
|
||||
|
||||
proc toHtml(input: string,
|
||||
rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile},
|
||||
rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile},
|
||||
error: ref string = nil,
|
||||
warnings: ref seq[string] = nil): string =
|
||||
## If `error` is nil then no errors should be generated.
|
||||
@@ -23,18 +23,20 @@ proc toHtml(input: string,
|
||||
toLocation(message, filename, line, col + ColRstOffset)
|
||||
message.add " $1: $2" % [$mc, a]
|
||||
if mc == mcError:
|
||||
doAssert error != nil, "unexpected RST error '" & message & "'"
|
||||
if error == nil:
|
||||
raise newException(EParseError, "[unexpected error] " & message)
|
||||
error[] = message
|
||||
# we check only first error because subsequent ones may be meaningless
|
||||
raise newException(EParseError, message)
|
||||
raise newException(EParseError, "")
|
||||
else:
|
||||
doAssert warnings != nil, "unexpected RST warning '" & message & "'"
|
||||
warnings[].add message
|
||||
try:
|
||||
result = rstToHtml(input, rstOptions, defaultConfig(),
|
||||
msgHandler=testMsgHandler)
|
||||
except EParseError:
|
||||
discard
|
||||
except EParseError as e:
|
||||
if e.msg != "":
|
||||
result = e.msg
|
||||
|
||||
# inline code tags (for parsing originated from highlite.nim)
|
||||
proc id(str: string): string = """<span class="Identifier">""" & str & "</span>"
|
||||
|
||||
Reference in New Issue
Block a user