doc/rst2html: some few fixes for enumerated and bullet lists (#16295)

* fix bullet/enumarated lists with many blank lines
* fix enumerated list parsing
* fix parse failure when next line after list empty
* implement arbitrary start of enumerator
* check that enumerators are in order
* remove redundant start=x if x=1 or a
* add some doc on implemented features
* update start in rst_examples.rst
* allow upper-case letters + more docs
This commit is contained in:
Andrey Makarov
2020-12-14 20:10:39 +03:00
committed by GitHub
parent 2728711dd3
commit e843492b13
6 changed files with 309 additions and 37 deletions

View File

@@ -11,6 +11,59 @@
## subset is implemented. Some features of the `markdown`:idx: wiki syntax are
## also supported.
##
## Supported RST features:
##
## * body elements
## + sections
## + transitions
## + paragraphs
## + bullet lists using \+, \*, \-
## + enumerated lists using arabic numerals or alphabet
## characters: 1. ... 2. ... *or* a. ... b. ... *or* A. ... B. ...
## + definition lists
## + field lists
## + option lists
## + indented literal blocks
## + simple tables
## + directives
## - image, figure
## - code-block
## - substitution definitions: replace and image
## - ... a few more
## + comments
## * inline markup
## + *emphasis*, **strong emphasis**, `interpreted text`,
## ``inline literals``, hyperlink references, substitution references,
## standalone hyperlinks
##
## Additional features:
##
## * ***triple emphasis*** (bold and italic) using \*\*\*
##
## Optional additional features, turned on by ``options: RstParseOption`` in
## `rstParse proc <#rstParse,string,string,int,int,bool,RstParseOptions,FindFileHandler,MsgHandler>`_:
##
## * emoji / smiley symbols
## * markdown tables
## * markdown code blocks
## * markdown links
## * markdown headlines
##
## Limitations:
##
## * no Unicode support in character width calculations
## * body elements
## - no roman numerals in enumerated lists
## - no quoted literal blocks
## - no doctest blocks
## - no grid tables
## - directives: no support for admonitions (notes, caution)
## - no footnotes & citations support
## - no inline internal targets
## * inline markup
## - no simple-inline-markup
## - no embedded URI and aliases
##
## **Note:** Import ``packages/docutils/rst`` to use this module
import
@@ -569,7 +622,9 @@ proc match(p: RstParser, start: int, expr: string): bool =
# 'p' tkPunct
# 'T' always true
# 'E' whitespace, indent or eof
# 'e' tkWord or '#' (for enumeration lists)
# 'e' any enumeration sequence or '#' (for enumeration lists)
# 'x' a..z or '#' (for enumeration lists)
# 'n' 0..9 or '#' (for enumeration lists)
var i = 0
var j = start
var last = expr.len - 1
@@ -583,12 +638,16 @@ proc match(p: RstParser, start: int, expr: string): bool =
of 'o': result = p.tok[j].kind == tkOther
of 'T': result = true
of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
of 'e':
of 'e', 'x', 'n':
result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
if result:
case p.tok[j].symbol[0]
of 'a'..'z', 'A'..'Z', '#': result = p.tok[j].symbol.len == 1
of '0'..'9': result = allCharsInSet(p.tok[j].symbol, {'0'..'9'})
of '#': result = true
of 'a'..'z', 'A'..'Z':
result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
of '0'..'9':
result = expr[i] in {'e', 'n'} and
allCharsInSet(p.tok[j].symbol, {'0'..'9'})
else: result = false
else:
var c = expr[i]
@@ -1465,33 +1524,55 @@ proc parseDefinitionList(p: var RstParser): PRstNode =
proc parseEnumList(p: var RstParser): PRstNode =
const
wildcards: array[0..2, string] = ["(e) ", "e) ", "e. "]
wildpos: array[0..2, int] = [1, 0, 0]
result = nil
wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
"(x) ", "x) ", "x. "]
# enumerator patterns, where 'x' means letter and 'n' means number
wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens
wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
# position of enumeration sequence (number/letter) in enumerator
result = newRstNode(rnEnumList)
let col = currentTok(p).col
var w = 0
while w <= 2:
while w < wildcards.len:
if match(p, p.idx, wildcards[w]): break
inc w
if w <= 2:
var col = currentTok(p).col
result = newRstNode(rnEnumList)
inc p.idx, wildpos[w] + 3
var j = tokenAfterNewline(p)
if p.tok[j].col == currentTok(p).col or match(p, j, wildcards[w]):
pushInd(p, currentTok(p).col)
while true:
var item = newRstNode(rnEnumItem)
parseSection(p, item)
result.add(item)
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
match(p, p.idx + 1, wildcards[w]):
inc p.idx, wildpos[w] + 4
else:
break
popInd(p)
assert w < wildcards.len
for i in 0 ..< wildToken[w]-1: # add first enumerator with (, ), and .
if p.tok[p.idx + i].symbol == "#":
result.text.add "1"
else:
dec p.idx, wildpos[w] + 3
result = nil
result.text.add p.tok[p.idx + i].symbol
var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
inc p.idx, wildToken[w]
while true:
var item = newRstNode(rnEnumItem)
pushInd(p, currentTok(p).col)
parseSection(p, item)
popInd(p)
result.add(item)
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
match(p, p.idx+1, wildcards[w]):
let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
# check that it's in sequence: enumerator == next(prevEnum)
if "n" in wildcards[w]: # arabic numeral
let prevEnumI = try: parseInt(prevEnum) except: 1
let curEnum =
if enumerator == "#": prevEnumI + 1
else: (try: parseInt(enumerator) except: 1)
if curEnum - prevEnumI != 1:
break
prevEnum = enumerator
else: # a..z
let prevEnumI = ord(prevEnum[0])
let curEnum =
if enumerator == "#": prevEnumI + 1
else: ord(enumerator[0])
if curEnum - prevEnumI != 1:
break
prevEnum = $chr(curEnum)
inc p.idx, 1 + wildToken[w]
else:
break
proc sonKind(father: PRstNode, i: int): RstNodeKind =
result = rnLeaf
@@ -1511,6 +1592,8 @@ proc parseSection(p: var RstParser, result: PRstNode) =
result.add(a)
popInd(p)
else:
while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
inc p.idx # skip blank lines
leave = true
break
if leave or currentTok(p).kind == tkEof: break

View File

@@ -69,7 +69,7 @@ type
RstNode* {.acyclic, final.} = object ## an RST node's description
kind*: RstNodeKind ## the node's kind
text*: string ## valid for leafs in the AST; and the title of
## the document or the section
## the document or the section; and rnEnumList
level*: int ## valid for some node kinds
sons*: RstNodeSeq ## the node's sons

View File

@@ -1029,6 +1029,56 @@ proc renderField(d: PDoc, n: PRstNode, result: var string) =
if not b:
renderAux(d, n, "<tr>$1</tr>\n", "$1", result)
proc renderEnumList(d: PDoc, n: PRstNode, result: var string) =
var
specifier = ""
specStart = ""
i1 = 0
pre = ""
i2 = n.text.len-1
post = ""
if n.text[0] == '(':
i1 = 1
pre = "("
if n.text[^1] == ')' or n.text[^1] == '.':
i2 = n.text.len-2
post = $n.text[^1]
let enumR = i1 .. i2 # enumerator range without surrounding (, ), .
if d.target == outLatex:
result.add ("\n%"&n.text&"\n")
# use enumerate parameters from package enumitem
if n.text[i1].isDigit:
var labelDef = ""
if pre != "" or post != "":
labelDef = "label=" & pre & "\\arabic*" & post & ","
if n.text[enumR] != "1":
specStart = "start=$1" % [n.text[enumR]]
if labelDef != "" or specStart != "":
specifier = "[$1$2]" % [labelDef, specStart]
else:
let (first, labelDef) =
if n.text[i1].isUpperAscii: ('A', "label=" & pre & "\\Alph*" & post)
else: ('a', "label=" & pre & "\\alph*" & post)
if n.text[i1] != first:
specStart = ",start=" & $(ord(n.text[i1]) - ord(first) + 1)
specifier = "[$1$2]" % [labelDef, specStart]
else: # HTML
# TODO: implement enumerator formatting using pre and post ( and ) for HTML
if n.text[i1].isDigit:
if n.text[enumR] != "1":
specStart = " start=\"$1\"" % [n.text[enumR]]
specifier = "class=\"simple\"" & specStart
else:
let (first, labelDef) =
if n.text[i1].isUpperAscii: ('A', "class=\"upperalpha simple\"")
else: ('a', "class=\"loweralpha simple\"")
if n.text[i1] != first:
specStart = " start=\"$1\"" % [ $(ord(n.text[i1]) - ord(first) + 1) ]
specifier = labelDef & specStart
renderAux(d, n, "<ol " & specifier & ">$1</ol>\n",
"\\begin{enumerate}" & specifier & "$1\\end{enumerate}\n",
result)
proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
if n == nil: return
case n.kind
@@ -1042,9 +1092,7 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
"\\begin{itemize}$1\\end{itemize}\n", result)
of rnBulletItem, rnEnumItem:
renderAux(d, n, "<li>$1</li>\n", "\\item $1\n", result)
of rnEnumList:
renderAux(d, n, "<ol class=\"simple\">$1</ol>\n",
"\\begin{enumerate}$1\\end{enumerate}\n", result)
of rnEnumList: renderEnumList(d, n, result)
of rnDefList:
renderAux(d, n, "<dl class=\"docutils\">$1</dl>\n",
"\\begin{description}$1\\end{description}\n", result)