docs: make inline markup more compatible with Markdown (#18053)

fixes https://github.com/timotheecour/Nim/issues/739
2026-06-05 03:14:08 +00:00 · 2021-05-21 07:54:20 +03:00
parent 6a5973882b
commit 9f7e2e3057
6 changed files with 168 additions and 62 deletions
--- a/doc/regexprs.txt
+++ b/doc/regexprs.txt
@@ -146,7 +146,7 @@ character          meaning
 After ``\x``, from zero to two hexadecimal digits are read (letters can be in
 upper or lower case). In UTF-8 mode, any number of hexadecimal digits may
 appear between ``\x{`` and ``}``, but the value of the character code must be
-less than 2**31 (that is, the maximum hexadecimal value is 7FFFFFFF). If
+less than 2^31 (that is, the maximum hexadecimal value is 7FFFFFFF). If
 characters other than hexadecimal digits appear between ``\x{`` and ``}``, or
 if there is no terminating ``}``, this form of escape is not recognized.
 Instead, the initial ``\x`` will be interpreted as a basic hexadecimal escape,
--- a/lib/impure/db_sqlite.nim
+++ b/lib/impure/db_sqlite.nim
@@ -152,7 +152,7 @@
 ## Instead, a `seq[string]` is returned for each row.
 ##
 ## The reasoning is as follows:
-## 1. it's close to what many DBs offer natively (char**)
+## 1. it's close to what many DBs offer natively (`char**`:c:)
 ## 2. it hides the number of types that the DB supports
 ##    (int? int64? decimal up to 10 places? geo coords?)
 ## 3. it's convenient when all you do is to forward the data to somewhere else (echo, log, put the data into a new query)
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -130,6 +130,32 @@
 ## .. warning:: Using Nim-specific features can cause other RST implementations
 ##   to fail on your document.
 ##
+## Idiosyncrasies
+## --------------
+##
+## Currently we do **not** aim at 100% Markdown or RST compatibility in inline
+## markup recognition rules because that would provide very little user value.
+## This parser has 2 modes for inline markup:
+##
+## 1) Markdown-like mode which is enabled by `roPreferMarkdown` option
+##    (turned **on** by default).
+##
+##    .. Note:: RST features like directives are still turned **on**
+##
+## 2) Compatibility mode which is RST rules.
+##
+## .. Note:: in both modes the parser interpretes text between single
+##    backticks (code) identically:
+##      backslash does not escape; the only exception: ``\`` folowed by `
+##      does escape so that we can always input a single backtick ` in
+##      inline code. However that makes impossible to input code with
+##      ``\`` at the end in *single* backticks, one must use *double*
+##      backticks::
+##
+##        `\`   -- WRONG
+##        ``\`` -- GOOD
+##        So single backticks can always be input: `\`` will turn to ` code
+##
 ## Limitations
 ## -----------
 ##
@@ -994,8 +1020,22 @@ proc expect(p: var RstParser, tok: string) =
  if currentTok(p).symbol == tok: inc p.idx
  else: rstMessage(p, meExpected, tok)

-proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
+proc inlineMarkdownEnd(p: RstParser): bool =
+  result = prevTok(p).kind notin {tkIndent, tkWhite}
+  ## (For a special case of ` we don't allow spaces surrounding it
+  ## unlike original Markdown because this behavior confusing/useless)
+
+proc inlineRstEnd(p: RstParser): bool =
  # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+  # Rule 2:
+  result = prevTok(p).kind notin {tkIndent, tkWhite}
+  if not result: return
+  # Rule 7:
+  result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
+      nextTok(p).symbol[0] in
+      {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
+
+proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
  if exact:
    result = currentTok(p).symbol == markup
  else:
@@ -1004,55 +1044,58 @@ proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
      # check that escaping may have splitted `` to 2 tokens ` and `
      result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
  if not result: return
-  # Rule 2:
-  result = prevTok(p).kind notin {tkIndent, tkWhite}
-  if not result: return
-  # Rule 7:
-  result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
-      (roPreferMarkdown in p.s.options and
-        markup in ["``", "`"] and
-        nextTok(p).kind in {tkIndent, tkWhite, tkWord, tkEof}) or
-      nextTok(p).symbol[0] in
-      {'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
-  if not result: return
-  # Rule 4:
-  if p.idx > 0:
-    # see bug #17260; for now `\` must be written ``\``, likewise with sequences
-    # ending in an un-escaped `\`; `\\` is legal but not `\\\` for example;
-    # for this reason we can't use `["``", "`"]` here.
-    if markup != "``" and prevTok(p).symbol == "\\":
-      result = false
+  # surroundings check
+  if markup in ["_", "__"]:
+    result = inlineRstEnd(p)
+  else:
+    if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
+    else: result = inlineRstEnd(p)

-proc isInlineMarkupStart(p: RstParser, markup: string): bool =
-  # rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
-  var d: char
-  if markup != "_`":
-    result = currentTok(p).symbol == markup
-  else:  # _` is a 2 token case
-    result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
+proc rstRuleSurround(p: RstParser): bool =
+  result = true
+  # Rules 4 & 5:
+  if p.idx > 0:
+    var d: char
+    var c = prevTok(p).symbol[0]
+    case c
+    of '\'', '\"': d = c
+    of '(': d = ')'
+    of '[': d = ']'
+    of '{': d = '}'
+    of '<': d = '>'
+    else: d = '\0'
+    if d != '\0': result = nextTok(p).symbol[0] != d
+
+proc inlineMarkdownStart(p: RstParser): bool =
+  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
  if not result: return
-  # Rule 6:
+  # this rst rule is really nice, let us use it in Markdown mode too.
+  result = rstRuleSurround(p)
+
+proc inlineRstStart(p: RstParser): bool =
+  ## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
+  # Rule 6
  result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
-      (markup in ["``", "`"] and prevTok(p).kind in {tkIndent, tkWhite, tkWord}) or
      prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
  if not result: return
  # Rule 1:
  result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
  if not result: return
-  # Rules 4 & 5:
-  if p.idx > 0:
-    if prevTok(p).symbol == "\\":
-      result = false
-    else:
-      var c = prevTok(p).symbol[0]
-      case c
-      of '\'', '\"': d = c
-      of '(': d = ')'
-      of '[': d = ']'
-      of '{': d = '}'
-      of '<': d = '>'
-      else: d = '\0'
-      if d != '\0': result = nextTok(p).symbol[0] != d
+  result = rstRuleSurround(p)
+
+proc isInlineMarkupStart(p: RstParser, markup: string): bool =
+  if markup != "_`":
+    result = currentTok(p).symbol == markup
+  else:  # _` is a 2 token case
+    result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
+  if not result: return
+  # surroundings check
+  if markup in ["_", "__", "[", "|"]:
+    # Note: we require space/punctuation even before [markdown link](...)
+    result = inlineRstStart(p)
+  else:
+    if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
+    else: result = inlineRstStart(p)

 proc match(p: RstParser, start: int, expr: string): bool =
  # regular expressions are:
@@ -1263,10 +1306,7 @@ proc parseWordOrRef(p: var RstParser, father: PRstNode) =

 proc parseBackslash(p: var RstParser, father: PRstNode) =
  assert(currentTok(p).kind == tkPunct)
-  if currentTok(p).symbol == "\\\\":
-    father.add newLeaf("\\")
-    inc p.idx
-  elif currentTok(p).symbol == "\\":
+  if currentTok(p).symbol == "\\":
    # XXX: Unicode?
    inc p.idx
    if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
@@ -1297,11 +1337,20 @@ proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
        break
      else:
        if postfix == "`":
-          if prevTok(p).symbol == "\\" and currentTok(p).symbol == "`":
-            father.sons[^1] = newLeaf(p) # instead, we should use lookahead
+          if currentTok(p).symbol == "\\":
+            if nextTok(p).symbol == "\\":
+              father.add newLeaf("\\")
+              father.add newLeaf("\\")
+              inc p.idx, 2
+            elif nextTok(p).symbol == "`":  # escape `
+              father.add newLeaf("`")
+              inc p.idx, 2
+            else:
+              father.add newLeaf("\\")
+              inc p.idx
          else:
            father.add(newLeaf(p))
-          inc p.idx
+            inc p.idx
        else:
          if interpretBackslash:
            parseBackslash(p, father)
--- a/lib/posix/posix_utils.nim
+++ b/lib/posix/posix_utils.nim
@@ -7,7 +7,7 @@
 #

 ## A set of helpers for the POSIX module.
-## Raw interfaces are in the other posix*.nim files.
+## Raw interfaces are in the other ``posix*.nim`` files.

 # Where possible, contribute OS-independent procs in `os <os.html>`_ instead.

--- a/tests/stdlib/trst.nim
+++ b/tests/stdlib/trst.nim
@@ -23,7 +23,7 @@ import std/private/miscdollars
 import os

 proc toAst(input: string,
-            rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile},
+            rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile},
            error: ref string = nil,
            warnings: ref seq[string] = nil): string =
  ## If `error` is nil then no errors should be generated.
@@ -36,10 +36,11 @@ proc toAst(input: string,
    toLocation(message, filename, line, col + ColRstOffset)
    message.add " $1: $2" % [$mc, a]
    if mc == mcError:
-      doAssert error != nil, "unexpected RST error '" & message & "'"
+      if error == nil:
+        raise newException(EParseError, "[unexpected error] " & message)
      error[] = message
      # we check only first error because subsequent ones may be meaningless
-      raise newException(EParseError, message)
+      raise newException(EParseError, "")
    else:
      doAssert warnings != nil, "unexpected RST warning '" & message & "'"
      warnings[].add message
@@ -54,8 +55,9 @@ proc toAst(input: string,
    var rst = rstParse(input, filen, line=LineRstInit, column=ColRstInit,
                       dummyHasToc, rstOptions, myFindFile, testMsgHandler)
    result = renderRstToStr(rst)
-  except EParseError:
-    discard
+  except EParseError as e:
+    if e.msg != "":
+      result = e.msg

 suite "RST parsing":
  test "option list has priority over definition list":
@@ -326,6 +328,28 @@ suite "RST escaping":
      """)

 suite "RST inline markup":
+  test "* and ** surrounded by spaces are not inline markup":
+    check("a * b * c ** d ** e".toAst == dedent"""
+      rnInner
+        rnLeaf  'a'
+        rnLeaf  ' '
+        rnLeaf  '*'
+        rnLeaf  ' '
+        rnLeaf  'b'
+        rnLeaf  ' '
+        rnLeaf  '*'
+        rnLeaf  ' '
+        rnLeaf  'c'
+        rnLeaf  ' '
+        rnLeaf  '**'
+        rnLeaf  ' '
+        rnLeaf  'd'
+        rnLeaf  ' '
+        rnLeaf  '**'
+        rnLeaf  ' '
+        rnLeaf  'e'
+      """)
+
  test "end-string has repeating symbols":
    check("*emphasis content****".toAst == dedent"""
      rnEmphasis
@@ -420,6 +444,37 @@ suite "RST inline markup":
          rnLeaf  'proc `+`'
      """)

+    check("""`\\`""".toAst ==
+      dedent"""
+        rnInlineCode
+          rnDirArg
+            rnLeaf  'nim'
+          [nil]
+          rnLiteralBlock
+            rnLeaf  '\\'
+        """)
+
+  test "Markdown-style code/backtick":
+    # no whitespace is required before `
+    check("`try`...`except`".toAst ==
+      dedent"""
+        rnInner
+          rnInlineCode
+            rnDirArg
+              rnLeaf  'nim'
+            [nil]
+            rnLiteralBlock
+              rnLeaf  'try'
+          rnLeaf  '...'
+          rnInlineCode
+            rnDirArg
+              rnLeaf  'nim'
+            [nil]
+            rnLiteralBlock
+              rnLeaf  'except'
+        """)
+
+
  test """inline literals can contain \ anywhere""":
    check("""``\``""".toAst == dedent"""
      rnInlineLiteral
--- a/tests/stdlib/trstgen.nim
+++ b/tests/stdlib/trstgen.nim
@@ -10,7 +10,7 @@ import unittest, strutils, strtabs
 import std/private/miscdollars

 proc toHtml(input: string,
-            rstOptions: RstParseOptions = {roSupportMarkdown, roNimFile},
+            rstOptions: RstParseOptions = {roPreferMarkdown, roSupportMarkdown, roNimFile},
            error: ref string = nil,
            warnings: ref seq[string] = nil): string =
  ## If `error` is nil then no errors should be generated.
@@ -23,18 +23,20 @@ proc toHtml(input: string,
    toLocation(message, filename, line, col + ColRstOffset)
    message.add " $1: $2" % [$mc, a]
    if mc == mcError:
-      doAssert error != nil, "unexpected RST error '" & message & "'"
+      if error == nil:
+        raise newException(EParseError, "[unexpected error] " & message)
      error[] = message
      # we check only first error because subsequent ones may be meaningless
-      raise newException(EParseError, message)
+      raise newException(EParseError, "")
    else:
      doAssert warnings != nil, "unexpected RST warning '" & message & "'"
      warnings[].add message
  try:
    result = rstToHtml(input, rstOptions, defaultConfig(),
                       msgHandler=testMsgHandler)
-  except EParseError:
-    discard
+  except EParseError as e:
+    if e.msg != "":
+      result = e.msg

 # inline code tags (for parsing originated from highlite.nim)
 proc id(str: string): string = """<span class="Identifier">"""  & str & "</span>"