doc/rst2html: some few fixes for enumerated and bullet lists (#16295)

* fix bullet/enumarated lists with many blank lines
* fix enumerated list parsing
* fix parse failure when next line after list empty
* implement arbitrary start of enumerator
* check that enumerators are in order
* remove redundant start=x if x=1 or a
* add some doc on implemented features
* update start in rst_examples.rst
* allow upper-case letters + more docs
This commit is contained in:
Andrey Makarov
2020-12-14 20:10:39 +03:00
committed by GitHub
parent 2728711dd3
commit e843492b13
6 changed files with 309 additions and 37 deletions

View File

@@ -50,6 +50,7 @@ doc.file = """
\usepackage{fancyvrb, courier}
\usepackage{tabularx}
\usepackage{hyperref}
\usepackage{enumitem}
\begin{document}
\title{$title $version}

View File

@@ -11,6 +11,59 @@
## subset is implemented. Some features of the `markdown`:idx: wiki syntax are
## also supported.
##
## Supported RST features:
##
## * body elements
## + sections
## + transitions
## + paragraphs
## + bullet lists using \+, \*, \-
## + enumerated lists using arabic numerals or alphabet
## characters: 1. ... 2. ... *or* a. ... b. ... *or* A. ... B. ...
## + definition lists
## + field lists
## + option lists
## + indented literal blocks
## + simple tables
## + directives
## - image, figure
## - code-block
## - substitution definitions: replace and image
## - ... a few more
## + comments
## * inline markup
## + *emphasis*, **strong emphasis**, `interpreted text`,
## ``inline literals``, hyperlink references, substitution references,
## standalone hyperlinks
##
## Additional features:
##
## * ***triple emphasis*** (bold and italic) using \*\*\*
##
## Optional additional features, turned on by ``options: RstParseOption`` in
## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_:
##
## * emoji / smiley symbols
## * markdown tables
## * markdown code blocks
## * markdown links
## * markdown headlines
##
## Limitations:
##
## * no Unicode support in character width calculations
## * body elements
## - no roman numerals in enumerated lists
## - no quoted literal blocks
## - no doctest blocks
## - no grid tables
## - directives: no support for admonitions (notes, caution)
## - no footnotes & citations support
## - no inline internal targets
## * inline markup
## - no simple-inline-markup
## - no embedded URI and aliases
##
## **Note:** Import ``packages/docutils/rst`` to use this module
import
@@ -569,7 +622,9 @@ proc match(p: RstParser, start: int, expr: string): bool =
# 'p' tkPunct
# 'T' always true
# 'E' whitespace, indent or eof
# 'e' tkWord or '#' (for enumeration lists)
# 'e' any enumeration sequence or '#' (for enumeration lists)
# 'x' a..z or '#' (for enumeration lists)
# 'n' 0..9 or '#' (for enumeration lists)
var i = 0
var j = start
var last = expr.len - 1
@@ -583,12 +638,16 @@ proc match(p: RstParser, start: int, expr: string): bool =
of 'o': result = p.tok[j].kind == tkOther
of 'T': result = true
of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
of 'e':
of 'e', 'x', 'n':
result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
if result:
case p.tok[j].symbol[0]
of 'a'..'z', 'A'..'Z', '#': result = p.tok[j].symbol.len == 1
of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'})
of '#': result = true
of 'a'..'z', 'A'..'Z':
result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
of '0'..'9':
result = expr[i] in {'e', 'n'} and
allCharsInSet(p.tok[j].symbol, {'0'..'9'})
else: result = false
else:
var c = expr[i]
@@ -1465,33 +1524,55 @@ proc parseDefinitionList(p: var RstParser): PRstNode =
proc parseEnumList(p: var RstParser): PRstNode =
const
wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "]
wildpos: array[0..2, int] = [1, 0, 0]
result = nil
wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
"(x) ", "x) ", "x. "]
# enumerator patterns, where 'x' means letter and 'n' means number
wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens
wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
# position of enumeration sequence (number/letter) in enumerator
result = newRstNode(rnEnumList)
let col = currentTok(p).col
var w = 0
while w <= 2:
while w < wildcards.len:
if match(p, p.idx, wildcards[w]): break
inc w
if w <= 2:
var col = currentTok(p).col
result = newRstNode(rnEnumList)
inc p.idx, wildpos[w] + 3
var j = tokenAfterNewline(p)
if p.tok[j].col == currentTok(p).col or match(p, j, wildcards[w]):
pushInd(p, currentTok(p).col)
while true:
var item = newRstNode(rnEnumItem)
parseSection(p, item)
result.add(item)
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
match(p, p.idx + 1, wildcards[w]):
inc p.idx, wildpos[w] + 4
else:
break
popInd(p)
assert w < wildcards.len
for i in 0 ..< wildToken[w]-1: # add first enumerator with (, ), and .
if p.tok[p.idx + i].symbol == "#":
result.text.add "1"
else:
dec p.idx, wildpos[w] + 3
result = nil
result.text.add p.tok[p.idx + i].symbol
var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
inc p.idx, wildToken[w]
while true:
var item = newRstNode(rnEnumItem)
pushInd(p, currentTok(p).col)
parseSection(p, item)
popInd(p)
result.add(item)
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
match(p, p.idx+1, wildcards[w]):
let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
# check that it's in sequence: enumerator == next(prevEnum)
if "n" in wildcards[w]: # arabic numeral
let prevEnumI = try: parseInt(prevEnum) except: 1
let curEnum =
if enumerator == "#": prevEnumI + 1
else: (try: parseInt(enumerator) except: 1)
if curEnum - prevEnumI != 1:
break
prevEnum = enumerator
else: # a..z
let prevEnumI = ord(prevEnum[0])
let curEnum =
if enumerator == "#": prevEnumI + 1
else: ord(enumerator[0])
if curEnum - prevEnumI != 1:
break
prevEnum = $chr(curEnum)
inc p.idx, 1 + wildToken[w]
else:
break
proc sonKind(father: PRstNode, i: int): RstNodeKind =
result = rnLeaf
@@ -1511,6 +1592,8 @@ proc parseSection(p: var RstParser, result: PRstNode) =
result.add(a)
popInd(p)
else:
while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
inc p.idx # skip blank lines
leave = true
break
if leave or currentTok(p).kind == tkEof: break

View File

@@ -69,7 +69,7 @@ type
RstNode* {.acyclic, final.} = object ## an RST node's description
kind*: RstNodeKind ## the node's kind
text*: string ## valid for leafs in the AST; and the title of
## the document or the section
## the document or the section; and rnEnumList
level*: int ## valid for some node kinds
sons*: RstNodeSeq ## the node's sons

View File

@@ -1029,6 +1029,56 @@ proc renderField(d: PDoc, n: PRstNode, result: var string) =
if not b:
renderAux(d, n, "<tr>$1</tr>\n", "$1", result)
proc renderEnumList(d: PDoc, n: PRstNode, result: var string) =
var
specifier = ""
specStart = ""
i1 = 0
pre = ""
i2 = n.text.len-1
post = ""
if n.text[0] == '(':
i1 = 1
pre = "("
if n.text[^1] == ')' or n.text[^1] == '.':
i2 = n.text.len-2
post = $n.text[^1]
let enumR = i1 .. i2 # enumerator range without surrounding (, ), .
if d.target == outLatex:
result.add ("\n%"&n.text&"\n")
# use enumerate parameters from package enumitem
if n.text[i1].isDigit:
var labelDef = ""
if pre != "" or post != "":
labelDef = "label=" & pre & "\\arabic*" & post & ","
if n.text[enumR] != "1":
specStart = "start=$1" % [n.text[enumR]]
if labelDef != "" or specStart != "":
specifier = "[$1$2]" % [labelDef, specStart]
else:
let (first, labelDef) =
if n.text[i1].isUpperAscii: ('A', "label=" & pre & "\\Alph*" & post)
else: ('a', "label=" & pre & "\\alph*" & post)
if n.text[i1] != first:
specStart = ",start=" & $(ord(n.text[i1]) - ord(first) + 1)
specifier = "[$1$2]" % [labelDef, specStart]
else: # HTML
# TODO: implement enumerator formatting using pre and post ( and ) for HTML
if n.text[i1].isDigit:
if n.text[enumR] != "1":
specStart = " start=\"$1\"" % [n.text[enumR]]
specifier = "class=\"simple\"" & specStart
else:
let (first, labelDef) =
if n.text[i1].isUpperAscii: ('A', "class=\"upperalpha simple\"")
else: ('a', "class=\"loweralpha simple\"")
if n.text[i1] != first:
specStart = " start=\"$1\"" % [ $(ord(n.text[i1]) - ord(first) + 1) ]
specifier = labelDef & specStart
renderAux(d, n, "<ol " & specifier & ">$1</ol>\n",
"\\begin{enumerate}" & specifier & "$1\\end{enumerate}\n",
result)
proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
if n == nil: return
case n.kind
@@ -1042,9 +1092,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
"\\begin{itemize}$1\\end{itemize}\n", result)
of rnBulletItem, rnEnumItem:
renderAux(d, n, "<li>$1</li>\n", "\\item $1\n", result)
of rnEnumList:
renderAux(d, n, "<ol class=\"simple\">$1</ol>\n",
"\\begin{enumerate}$1\\end{enumerate}\n", result)
of rnEnumList: renderEnumList(d, n, result)
of rnDefList:
renderAux(d, n, "<dl class=\"docutils\">$1</dl>\n",
"\\begin{description}$1\\end{description}\n", result)

View File

@@ -274,15 +274,17 @@ stmt = IND{&gt;} stmt ^+ IND{=} DED # list of statements
<li>An input parameter should not be aliased with a global or thread local variable updated by the called proc.</li>
</ol>
<p>One problem with rules 3 and 4 is that they affect specific global or thread local variables, but Nim's effect tracking only tracks &quot;uses no global variable&quot; via <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt>. The rules 3 and 4 can also be approximated by a different rule:</p>
<ol class="simple"><li>A global or thread local variable (or a location derived from such a location) can only passed to a parameter of a <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt> proc.</li>
<ol class="simple" start="5"><li>A global or thread local variable (or a location derived from such a location) can only passed to a parameter of a <tt class="docutils literal"><span class="pre">.noSideEffect</span></tt> proc.</li>
</ol>
<p>These two procs are the two modus operandi of the real-time garbage collector:</p>
<p>(1) GC_SetMaxPause Mode</p>
<blockquote><p>You can call <tt class="docutils literal"><span class="pre">GC_SetMaxPause</span></tt> at program startup and then each triggered garbage collector run tries to not take longer than <tt class="docutils literal"><span class="pre">maxPause</span></tt> time. However, it is possible (and common) that the work is nevertheless not evenly distributed as each call to <tt class="docutils literal"><span class="pre">new</span></tt> can trigger the garbage collector and thus take <tt class="docutils literal"><span class="pre">maxPause</span></tt> time.</p></blockquote>
<p>(2) GC_step Mode</p>
<blockquote><p><p>This allows the garbage collector to perform some work for up to <tt class="docutils literal"><span class="pre">us</span></tt> time. This is useful to call in the main loop to ensure the garbage collector can do its work. To bind all garbage collector activity to a <tt class="docutils literal"><span class="pre">GC_step</span></tt> call, deactivate the garbage collector with <tt class="docutils literal"><span class="pre">GC_disable</span></tt> at program startup. If <tt class="docutils literal"><span class="pre">strongAdvice</span></tt> is set to <tt class="docutils literal"><span class="pre">true</span></tt>, then the garbage collector will be forced to perform the collection cycle. Otherwise, the garbage collector may decide not to do anything, if there is not much garbage to collect. You may also specify the current stack size via <tt class="docutils literal"><span class="pre">stackSize</span></tt> parameter. It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
<ol class="simple"><li><p>GC_SetMaxPause Mode</p>
<p>You can call <tt class="docutils literal"><span class="pre">GC_SetMaxPause</span></tt> at program startup and then each triggered garbage collector run tries to not take longer than <tt class="docutils literal"><span class="pre">maxPause</span></tt> time. However, it is possible (and common) that the work is nevertheless not evenly distributed as each call to <tt class="docutils literal"><span class="pre">new</span></tt> can trigger the garbage collector and thus take <tt class="docutils literal"><span class="pre">maxPause</span></tt> time.</p>
</li>
<li><p>GC_step Mode</p>
<p>This allows the garbage collector to perform some work for up to <tt class="docutils literal"><span class="pre">us</span></tt> time. This is useful to call in the main loop to ensure the garbage collector can do its work. To bind all garbage collector activity to a <tt class="docutils literal"><span class="pre">GC_step</span></tt> call, deactivate the garbage collector with <tt class="docutils literal"><span class="pre">GC_disable</span></tt> at program startup. If <tt class="docutils literal"><span class="pre">strongAdvice</span></tt> is set to <tt class="docutils literal"><span class="pre">true</span></tt>, then the garbage collector will be forced to perform the collection cycle. Otherwise, the garbage collector may decide not to do anything, if there is not much garbage to collect. You may also specify the current stack size via <tt class="docutils literal"><span class="pre">stackSize</span></tt> parameter. It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
<p>It can improve performance when you know that there are no unique Nim references below a certain point on the stack. Make sure the size you specify is greater than the potential worst-case size.</p>
</p></blockquote>
</li>
</ol>
<p>These procs provide a &quot;best effort&quot; real-time guarantee; in particular the cycle collector is not aware of deadlines. Deactivate it to get more predictable real-time behaviour. Tests show that a 1ms max pause time will be met in almost all cases on modern CPUs (with the cycle collector disabled).</p>
<h2><a class="toc-backref" id="code-reordering-time-measurement-with-garbage-collectors" href="#code-reordering-time-measurement-with-garbage-collectors">Time measurement with garbage collectors</a></h2><p>The garbage collectors' way of measuring time uses (see <tt class="docutils literal"><span class="pre">lib/system/timers.nim</span></tt> for the implementation):</p>

View File

@@ -361,6 +361,144 @@ Test1
assert "line block\\\\" in output1l
assert "other line\\\\" in output1l
test "RST enumerated lists":
let input1 = dedent """
1. line1
1
2. line2
2
3. line3
3
4. line4
4
5. line5
5
"""
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
for i in 1..5:
assert ($i & ". line" & $i) notin output1
assert ("<li>line" & $i & " " & $i & "</li>") in output1
let input2 = dedent """
3. line3
4. line4
5. line5
7. line7
8. line8
"""
let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
for i in [3, 4, 5, 7, 8]:
assert ($i & ". line" & $i) notin output2
assert ("<li>line" & $i & "</li>") in output2
# check that nested enumerated lists work
let input3 = dedent """
1. a) string1
2. string2
"""
let output3 = rstToHtml(input3, {roSupportMarkdown}, defaultConfig())
assert count(output3, "<ol ") == 2
assert count(output3, "</ol>") == 2
assert "<li>string1</li>" in output3 and "<li>string2</li>" in output3
let input4 = dedent """
Check that enumeration specifiers are respected
9. string1
10. string2
12. string3
b) string4
c) string5
e) string6
"""
let output4 = rstToHtml(input4, {roSupportMarkdown}, defaultConfig())
assert count(output4, "<ol ") == 4
assert count(output4, "</ol>") == 4
for enumerator in [9, 12]:
assert "start=\"$1\"" % [$enumerator] in output4
for enumerator in [2, 5]: # 2=b, 5=e
assert "start=\"$1\"" % [$enumerator] in output4
let input5 = dedent """
Check that auto-numbered enumeration lists work.
#. string1
#. string2
#. string3
"""
let output5 = rstToHtml(input5, {roSupportMarkdown}, defaultConfig())
assert count(output5, "<ol ") == 1
assert count(output5, "</ol>") == 1
assert count(output5, "<li>") == 3
let input6 = dedent """
... And for alphabetic enumerators too!
b. string1
#. string2
#. string3
"""
let output6 = rstToHtml(input6, {roSupportMarkdown}, defaultConfig())
assert count(output6, "<ol ") == 1
assert count(output6, "</ol>") == 1
assert count(output6, "<li>") == 3
assert "start=\"2\"" in output6 and "class=\"loweralpha simple\"" in output6
let input7 = dedent """
... And for uppercase alphabetic enumerators.
C. string1
#. string2
#. string3
"""
let output7 = rstToHtml(input7, {roSupportMarkdown}, defaultConfig())
assert count(output7, "<ol ") == 1
assert count(output7, "</ol>") == 1
assert count(output7, "<li>") == 3
assert "start=\"3\"" in output7 and "class=\"upperalpha simple\"" in output7
test "RST bullet lists":
let input1 = dedent """
* line1
1
* line2
2
* line3
3
* line4
4
* line5
5
"""
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
for i in 1..5:
assert ("<li>line" & $i & " " & $i & "</li>") in output1
assert count(output1, "<ul ") == 1
assert count(output1, "</ul>") == 1
suite "RST/Code highlight":
test "Basic Python code highlight":
let pythonCode = """