doc/rst2html: some few fixes for enumerated and bullet lists (#16295)

* fix bullet/enumarated lists with many blank lines * fix enumerated list parsing * fix parse failure when next line after list empty * implement arbitrary start of enumerator * check that enumerators are in order * remove redundant start=x if x=1 or a * add some doc on implemented features * update start in rst_examples.rst * allow upper-case letters + more docs
2026-02-12 06:18:51 +00:00 · 2020-12-14 20:10:39 +03:00
parent 2728711dd3
commit e843492b13
6 changed files with 309 additions and 37 deletions
--- a/config/nimdoc.tex.cfg
+++ b/config/nimdoc.tex.cfg
@@ -50,6 +50,7 @@ doc.file = """
 \usepackage{fancyvrb, courier}
 \usepackage{tabularx}
 \usepackage{hyperref}
+\usepackage{enumitem}

 \begin{document}
 \title{$title $version}
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -11,6 +11,59 @@
 ## subset is implemented. Some features of the `markdown`:idx: wiki syntax are
 ## also supported.
 ##
+## Supported RST features:
+##
+## * body elements
+##   + sections
+##   + transitions
+##   + paragraphs
+##   + bullet lists using \+, \*, \-
+##   + enumerated lists using arabic numerals or alphabet
+##     characters:  1. ... 2. ... *or* a. ... b. ... *or* A. ... B. ...
+##   + definition lists
+##   + field lists
+##   + option lists
+##   + indented literal blocks
+##   + simple tables
+##   + directives
+##     - image, figure
+##     - code-block
+##     - substitution definitions: replace and image
+##     - ... a few more
+##   + comments
+## * inline markup
+##   + *emphasis*, **strong emphasis**, `interpreted text`,
+##     ``inline literals``, hyperlink references, substitution references,
+##     standalone hyperlinks
+##
+## Additional features:
+##
+## * ***triple emphasis*** (bold and italic) using \*\*\*
+##
+## Optional additional features, turned on by ``options: RstParseOption`` in
+## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_:
+##
+## * emoji / smiley symbols
+## * markdown tables
+## * markdown code blocks
+## * markdown links
+## * markdown headlines
+##
+## Limitations:
+##
+## * no Unicode support in character width calculations
+## * body elements
+##   - no roman numerals in enumerated lists
+##   - no quoted literal blocks
+##   - no doctest blocks
+##   - no grid tables
+##   - directives: no support for admonitions (notes, caution)
+##   - no footnotes & citations support
+##   - no inline internal targets
+## * inline markup
+##   - no simple-inline-markup
+##   - no embedded URI and aliases
+##
 ## **Note:** Import ``packages/docutils/rst`` to use this module

 import
@@ -569,7 +622,9 @@ proc match(p: RstParser, start: int, expr: string): bool =
  # 'p'              tkPunct
  # 'T'              always true
  # 'E'              whitespace, indent or eof
-  # 'e'              tkWord or '#' (for enumeration lists)
+  # 'e'              any enumeration sequence or '#' (for enumeration lists)
+  # 'x'              a..z or '#' (for enumeration lists)
+  # 'n'              0..9 or '#' (for enumeration lists)
  var i = 0
  var j = start
  var last = expr.len - 1
@@ -583,12 +638,16 @@ proc match(p: RstParser, start: int, expr: string): bool =
    of 'o': result = p.tok[j].kind == tkOther
    of 'T': result = true
    of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
-    of 'e':
+    of 'e', 'x', 'n':
      result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
      if result:
        case p.tok[j].symbol[0]
-        of 'a'..'z', 'A'..'Z', '#': result = p.tok[j].symbol.len == 1
-        of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'})
+        of '#': result = true
+        of 'a'..'z', 'A'..'Z':
+          result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
+        of '0'..'9':
+          result = expr[i] in {'e', 'n'} and
+                     allCharsInSet(p.tok[j].symbol, {'0'..'9'})
        else: result = false
    else:
      var c = expr[i]
@@ -1465,33 +1524,55 @@ proc parseDefinitionList(p: var RstParser): PRstNode =

 proc parseEnumList(p: var RstParser): PRstNode =
  const
-    wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "]
-    wildpos: array[0..2, int] = [1, 0, 0]
-  result = nil
+    wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
+                                      "(x) ", "x) ", "x. "]
+      # enumerator patterns, where 'x' means letter and 'n' means number
+    wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3]  # number of tokens
+    wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
+      # position of enumeration sequence (number/letter) in enumerator
+  result = newRstNode(rnEnumList)
+  let col = currentTok(p).col
  var w = 0
-  while w <= 2:
+  while w < wildcards.len:
    if match(p, p.idx, wildcards[w]): break
    inc w
-  if w <= 2:
-    var col = currentTok(p).col
-    result = newRstNode(rnEnumList)
-    inc p.idx, wildpos[w] + 3
-    var j = tokenAfterNewline(p)
-    if p.tok[j].col == currentTok(p).col or match(p, j, wildcards[w]):
-      pushInd(p, currentTok(p).col)
-      while true:
-        var item = newRstNode(rnEnumItem)
-        parseSection(p, item)
-        result.add(item)
-        if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
-            match(p, p.idx + 1, wildcards[w]):
-          inc p.idx, wildpos[w] + 4
-        else:
-          break
-      popInd(p)
+  assert w < wildcards.len
+  for i in 0 ..< wildToken[w]-1:  # add first enumerator with (, ), and .
+    if p.tok[p.idx + i].symbol == "#":
+      result.text.add "1"
    else:
-      dec p.idx, wildpos[w] + 3
-      result = nil
+      result.text.add p.tok[p.idx + i].symbol
+  var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
+  inc p.idx, wildToken[w]
+  while true:
+    var item = newRstNode(rnEnumItem)
+    pushInd(p, currentTok(p).col)
+    parseSection(p, item)
+    popInd(p)
+    result.add(item)
+    if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
+        match(p, p.idx+1, wildcards[w]):
+      let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
+      # check that it's in sequence: enumerator == next(prevEnum)
+      if "n" in wildcards[w]:  # arabic numeral
+        let prevEnumI = try: parseInt(prevEnum) except: 1
+        let curEnum =
+          if enumerator == "#": prevEnumI + 1
+          else: (try: parseInt(enumerator) except: 1)
+        if curEnum - prevEnumI != 1:
+          break
+        prevEnum = enumerator
+      else:  # a..z
+        let prevEnumI = ord(prevEnum[0])
+        let curEnum =
+          if enumerator == "#": prevEnumI + 1
+          else: ord(enumerator[0])
+        if curEnum - prevEnumI != 1:
+          break
+        prevEnum = $chr(curEnum)
+      inc p.idx, 1 + wildToken[w]
+    else:
+      break

 proc sonKind(father: PRstNode, i: int): RstNodeKind =
  result = rnLeaf
@@ -1511,6 +1592,8 @@ proc parseSection(p: var RstParser, result: PRstNode) =
        result.add(a)
        popInd(p)
      else:
+        while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
+          inc p.idx  # skip blank lines
        leave = true
        break
    if leave or currentTok(p).kind == tkEof: break
--- a/lib/packages/docutils/rstast.nim
+++ b/lib/packages/docutils/rstast.nim
@@ -69,7 +69,7 @@ type
  RstNode* {.acyclic, final.} = object ## an RST node's description
    kind*: RstNodeKind       ## the node's kind
    text*: string             ## valid for leafs in the AST; and the title of
-                              ## the document or the section
+                              ## the document or the section; and rnEnumList
    level*: int               ## valid for some node kinds
    sons*: RstNodeSeq        ## the node's sons

--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -1029,6 +1029,56 @@ proc renderField(d: PDoc, n: PRstNode, result: var string) =
  if not b:
    renderAux(d, n, "<tr>$1</tr>\n", "$1", result)

+proc renderEnumList(d: PDoc, n: PRstNode, result: var string) =
+  var
+    specifier = ""
+    specStart = ""
+    i1 = 0
+    pre = ""
+    i2 = n.text.len-1
+    post = ""
+  if n.text[0] == '(':
+    i1 = 1
+    pre = "("
+  if n.text[^1] == ')' or n.text[^1] == '.':
+    i2 = n.text.len-2
+    post = $n.text[^1]
+  let enumR = i1 .. i2  # enumerator range without surrounding (, ), .
+  if d.target == outLatex:
+    result.add ("\n%"&n.text&"\n")
+    # use enumerate parameters from package enumitem
+    if n.text[i1].isDigit:
+      var labelDef = ""
+      if pre != "" or post != "":
+        labelDef = "label=" & pre & "\\arabic*" & post & ","
+      if n.text[enumR] != "1":
+        specStart = "start=$1" % [n.text[enumR]]
+      if labelDef != "" or specStart != "":
+        specifier = "[$1$2]" % [labelDef, specStart]
+    else:
+      let (first, labelDef) =
+        if n.text[i1].isUpperAscii: ('A', "label=" & pre & "\\Alph*" & post)
+        else: ('a', "label=" & pre & "\\alph*" & post)
+      if n.text[i1] != first:
+        specStart = ",start=" & $(ord(n.text[i1]) - ord(first) + 1)
+      specifier = "[$1$2]" % [labelDef, specStart]
+  else:  # HTML
+    # TODO: implement enumerator formatting using pre and post ( and ) for HTML
+    if n.text[i1].isDigit:
+      if n.text[enumR] != "1":
+        specStart = " start=\"$1\"" % [n.text[enumR]]
+      specifier = "class=\"simple\"" & specStart
+    else:
+      let (first, labelDef) =
+        if n.text[i1].isUpperAscii: ('A', "class=\"upperalpha simple\"")
+        else: ('a', "class=\"loweralpha simple\"")
+      if n.text[i1] != first:
+        specStart = " start=\"$1\"" % [ $(ord(n.text[i1]) - ord(first) + 1) ]
+      specifier = labelDef & specStart
+  renderAux(d, n, "<ol " & specifier & ">$1</ol>\n",
+            "\\begin{enumerate}" & specifier & "$1\\end{enumerate}\n",
+            result)
+
 proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
  if n == nil: return
  case n.kind
@@ -1042,9 +1092,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
                    "\\begin{itemize}$1\\end{itemize}\n", result)
  of rnBulletItem, rnEnumItem:
    renderAux(d, n, "<li>$1</li>\n", "\\item $1\n", result)
-  of rnEnumList:
-    renderAux(d, n, "<ol class=\"simple\">$1</ol>\n",
-                    "\\begin{enumerate}$1\\end{enumerate}\n", result)
+  of rnEnumList: renderEnumList(d, n, result)
  of rnDefList:
    renderAux(d, n, "<dl class=\"docutils\">$1</dl>\n",
                       "\\begin{description}$1\\end{description}\n", result)
--- a/nimdoc/rst2html/expected/rst_examples.html
+++ b/nimdoc/rst2html/expected/rst_examples.html
@@ -274,15 +274,17 @@ stmt = IND{&gt;} stmt ^+ IND{=} DED  # list of statements
 <li>An input parameter should not be aliased with a global or thread local variable updated by the called proc.</li>
 </ol>
 <p>One problem with rules 3 and 4 is that they affect specific global or thread local variables, but Nim's effect tracking only tracks &quot;uses no global variable&quot; via <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt>. The rules 3 and 4 can also be approximated by a different rule:</p>
-<ol class="simple"><li>A global or thread local variable (or a location derived from such a location) can only passed to a parameter of a <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt> proc.</li>
+<ol class="simple" start="5"><li>A global or thread local variable (or a location derived from such a location) can only passed to a parameter of a <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt> proc.</li>
 </ol>
 <p>These two procs are the two modus operandi of the real-time garbage collector:</p>
-<p>(1) GC_SetMaxPause Mode</p>
-<blockquote><p>You can call <tt class="docutils literal"><span class="pre">GC_SetMaxPause</span></tt> at program startup and then each triggered garbage collector run tries to not take longer than <tt class="docutils literal"><span class="pre">maxPause</span></tt> time. However, it is possible (and common) that the work is nevertheless not evenly distributed as each call to <tt class="docutils literal"><span class="pre">new</span></tt> can trigger the garbage collector and thus take  <tt class="docutils literal"><span class="pre">maxPause</span></tt> time.</p></blockquote>
-<p>(2) GC_step Mode</p>
-<blockquote><p><p>This allows the garbage collector to perform some work for up to <tt class="docutils literal"><span class="pre">us</span></tt> time. This is useful to call in the main loop to ensure the garbage collector can do its work. To bind all garbage collector activity to a <tt class="docutils literal"><span class="pre">GC_step</span></tt> call, deactivate the garbage collector with <tt class="docutils literal"><span class="pre">GC_disable</span></tt> at program startup. If <tt class="docutils literal"><span class="pre">strongAdvice</span></tt> is set to <tt class="docutils literal"><span class="pre">true</span></tt>, then the garbage collector will be forced to perform the collection cycle. Otherwise, the garbage collector may decide not to do anything, if there is not much garbage to collect. You may also specify the current stack size via <tt class="docutils literal"><span class="pre">stackSize</span></tt> parameter. It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
+<ol class="simple"><li><p>GC_SetMaxPause Mode</p>
+<p>You can call <tt class="docutils literal"><span class="pre">GC_SetMaxPause</span></tt> at program startup and then each triggered garbage collector run tries to not take longer than <tt class="docutils literal"><span class="pre">maxPause</span></tt> time. However, it is possible (and common) that the work is nevertheless not evenly distributed as each call to <tt class="docutils literal"><span class="pre">new</span></tt> can trigger the garbage collector and thus take  <tt class="docutils literal"><span class="pre">maxPause</span></tt> time.</p>
+</li>
+<li><p>GC_step Mode</p>
+<p>This allows the garbage collector to perform some work for up to <tt class="docutils literal"><span class="pre">us</span></tt> time. This is useful to call in the main loop to ensure the garbage collector can do its work. To bind all garbage collector activity to a <tt class="docutils literal"><span class="pre">GC_step</span></tt> call, deactivate the garbage collector with <tt class="docutils literal"><span class="pre">GC_disable</span></tt> at program startup. If <tt class="docutils literal"><span class="pre">strongAdvice</span></tt> is set to <tt class="docutils literal"><span class="pre">true</span></tt>, then the garbage collector will be forced to perform the collection cycle. Otherwise, the garbage collector may decide not to do anything, if there is not much garbage to collect. You may also specify the current stack size via <tt class="docutils literal"><span class="pre">stackSize</span></tt> parameter. It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
 <p>It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
-</p></blockquote>
+</li>
+</ol>
 <p>These procs provide a &quot;best effort&quot; real-time guarantee; in particular the cycle collector is not aware of deadlines. Deactivate it to get more predictable real-time behaviour. Tests show that a 1ms max pause time will be met in almost all cases on modern CPUs (with the cycle collector disabled).</p>

 <h2><a class="toc-backref" id="code-reordering-time-measurement-with-garbage-collectors" href="#code-reordering-time-measurement-with-garbage-collectors">Time measurement with garbage collectors</a></h2><p>The garbage collectors' way of measuring time uses (see <tt class="docutils literal"><span class="pre">lib/system/timers.nim</span></tt> for the implementation):</p>
--- a/tests/stdlib/trstgen.nim
+++ b/tests/stdlib/trstgen.nim
@@ -361,6 +361,144 @@ Test1
    assert "line block\\\\" in output1l
    assert "other line\\\\" in output1l

+  test "RST enumerated lists":
+    let input1 = dedent """
+      1. line1
+         1
+      2. line2
+         2
+
+      3. line3
+         3
+
+
+      4. line4
+         4
+
+
+
+      5. line5
+         5
+      """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    for i in 1..5:
+      assert ($i & ". line" & $i) notin output1
+      assert ("<li>line" & $i & " " & $i & "</li>") in output1
+
+    let input2 = dedent """
+      3. line3
+
+      4. line4
+
+
+      5. line5
+
+
+
+      7. line7
+
+
+
+
+      8. line8
+      """
+    let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
+    for i in [3, 4, 5, 7, 8]:
+      assert ($i & ". line" & $i) notin output2
+      assert ("<li>line" & $i & "</li>") in output2
+
+    # check that nested enumerated lists work
+    let input3 = dedent """
+      1.  a) string1
+      2. string2
+      """
+    let output3 = rstToHtml(input3, {roSupportMarkdown}, defaultConfig())
+    assert count(output3, "<ol ") == 2
+    assert count(output3, "</ol>") == 2
+    assert "<li>string1</li>" in output3 and "<li>string2</li>" in output3
+
+    let input4 = dedent """
+      Check that enumeration specifiers are respected
+
+      9. string1
+      10. string2
+      12. string3
+
+      b) string4
+      c) string5
+      e) string6
+      """
+    let output4 = rstToHtml(input4, {roSupportMarkdown}, defaultConfig())
+    assert count(output4, "<ol ") == 4
+    assert count(output4, "</ol>") == 4
+    for enumerator in [9, 12]:
+      assert "start=\"$1\"" % [$enumerator] in output4
+    for enumerator in [2, 5]:  # 2=b, 5=e
+      assert "start=\"$1\"" % [$enumerator] in output4
+
+    let input5 = dedent """
+      Check that auto-numbered enumeration lists work.
+
+      #. string1
+      #. string2
+      #. string3
+      """
+    let output5 = rstToHtml(input5, {roSupportMarkdown}, defaultConfig())
+    assert count(output5, "<ol ") == 1
+    assert count(output5, "</ol>") == 1
+    assert count(output5, "<li>") == 3
+
+    let input6 = dedent """
+      ... And for alphabetic enumerators too!
+
+      b. string1
+      #. string2
+      #. string3
+      """
+    let output6 = rstToHtml(input6, {roSupportMarkdown}, defaultConfig())
+    assert count(output6, "<ol ") == 1
+    assert count(output6, "</ol>") == 1
+    assert count(output6, "<li>") == 3
+    assert "start=\"2\"" in output6 and "class=\"loweralpha simple\"" in output6
+
+    let input7 = dedent """
+      ... And for uppercase alphabetic enumerators.
+
+      C. string1
+      #. string2
+      #. string3
+      """
+    let output7 = rstToHtml(input7, {roSupportMarkdown}, defaultConfig())
+    assert count(output7, "<ol ") == 1
+    assert count(output7, "</ol>") == 1
+    assert count(output7, "<li>") == 3
+    assert "start=\"3\"" in output7 and "class=\"upperalpha simple\"" in output7
+
+  test "RST bullet lists":
+    let input1 = dedent """
+      * line1
+        1
+      * line2
+        2
+
+      * line3
+        3
+
+
+      * line4
+        4
+
+
+
+      * line5
+        5
+      """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    for i in 1..5:
+      assert ("<li>line" & $i & " " & $i & "</li>") in output1
+    assert count(output1, "<ul ") == 1
+    assert count(output1, "</ul>") == 1
+
 suite "RST/Code highlight":
  test "Basic Python code highlight":
    let pythonCode = """