RST: improve simple tables (#19859)

* RST: improve simple tables

* nim 1.0 gotchas

* Still allow legacy boundaries like `----`
This commit is contained in:
Andrey Makarov
2022-06-04 08:03:03 +03:00
committed by GitHub
parent f7a13f62d6
commit 4341b06f65
16 changed files with 487 additions and 120 deletions

View File

@@ -255,6 +255,7 @@ type
meExpected = "'$1' expected",
meGridTableNotImplemented = "grid table is not implemented",
meMarkdownIllformedTable = "illformed delimiter row of a Markdown table",
meIllformedTable = "Illformed table: $1",
meNewSectionExpected = "new section expected $1",
meGeneralParseError = "general parse error",
meInvalidDirective = "invalid directive: '$1'",
@@ -2467,81 +2468,175 @@ proc parseOverline(p: var RstParser): PRstNode =
anchorType=headlineAnchor)
type
IntSeq = seq[int]
ColumnLimits = tuple
ColSpec = object
start, stop: int
RstCols = seq[ColSpec]
ColumnLimits = tuple # for Markdown
first, last: int
ColSeq = seq[ColumnLimits]
proc tokEnd(p: RstParser): int =
result = currentTok(p).col + currentTok(p).symbol.len - 1
proc tokStart(p: RstParser, idx: int): int =
result = p.tok[idx].col
proc getColumns(p: var RstParser, cols: var IntSeq) =
proc tokStart(p: RstParser): int =
result = tokStart(p, p.idx)
proc tokEnd(p: RstParser, idx: int): int =
result = p.tok[idx].col + p.tok[idx].symbol.len - 1
proc tokEnd(p: RstParser): int =
result = tokEnd(p, p.idx)
proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int =
# Fills table column specification (or separator) `cols` and returns
# the next parser index after it.
var L = 0
result = startIdx
while true:
inc L
setLen(cols, L)
cols[L - 1] = tokEnd(p)
assert(currentTok(p).kind == tkAdornment)
inc p.idx
if currentTok(p).kind != tkWhite: break
inc p.idx
if currentTok(p).kind != tkAdornment: break
if currentTok(p).kind == tkIndent: inc p.idx
# last column has no limit:
cols[L - 1] = 32000
cols[L - 1].start = tokStart(p, result)
cols[L - 1].stop = tokEnd(p, result)
assert(p.tok[result].kind == tkAdornment)
inc result
if p.tok[result].kind != tkWhite: break
inc result
if p.tok[result].kind != tkAdornment: break
if p.tok[result].kind == tkIndent: inc result
proc checkColumns(p: RstParser, cols: RstCols) =
var
i = p.idx
col = 0
if p.tok[i].symbol[0] != '=':
rstMessage(p, mwRstStyle,
"only tables with `=` columns specification are allowed")
for col in 0 ..< cols.len:
if tokEnd(p, i) != cols[col].stop:
rstMessage(p, meIllformedTable,
"end of table column #$1 should end at position $2" % [
$(col+1), $(cols[col].stop+ColRstOffset)],
p.tok[i].line, tokEnd(p, i))
inc i
if col == cols.len - 1:
if p.tok[i].kind == tkWhite:
inc i
if p.tok[i].kind notin {tkIndent, tkEof}:
rstMessage(p, meIllformedTable, "extraneous column specification")
elif p.tok[i].kind == tkWhite:
inc i
else:
rstMessage(p, meIllformedTable, "no enough table columns",
p.tok[i].line, p.tok[i].col)
proc getSpans(p: RstParser, nextLine: int,
cols: RstCols, unitedCols: RstCols): seq[int] =
## Calculates how many columns a joined cell occupies.
if unitedCols.len > 0:
result = newSeq[int](unitedCols.len)
var
iCell = 0
jCell = 0
uCell = 0
while jCell < cols.len:
if cols[jCell].stop < unitedCols[uCell].stop:
inc jCell
elif cols[jCell].stop == unitedCols[uCell].stop:
result[uCell] = jCell - iCell + 1
iCell = jCell + 1
jCell = jCell + 1
inc uCell
else:
rstMessage(p, meIllformedTable,
"spanning underline does not match main table columns",
p.tok[nextLine].line, p.tok[nextLine].col)
proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode =
## Parses 1 row in RST simple table.
# Consider that columns may be spanning (united by using underline like ----):
let nextLine = tokenAfterNewline(p)
var unitedCols: RstCols
var afterSpan: int
if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-':
afterSpan = getColumns(p, unitedCols, nextLine)
if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar:
# legacy rst.nim compat.: allow punctuation like `----` in main boundaries
afterSpan = nextLine
unitedCols.setLen 0
else:
afterSpan = nextLine
template colEnd(i): int =
if i == cols.len - 1: high(int) # last column has no limit
elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop
template colStart(i): int =
if unitedCols.len > 0: unitedCols[i].start else: cols[i].start
var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len)
var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols)
let line = currentTok(p).line
# Iterate over the lines a single cell may span:
while true:
var nCell = 0
# distribute tokens between cells in the current line:
while currentTok(p).kind notin {tkIndent, tkEof}:
if tokEnd(p) <= colEnd(nCell):
if tokStart(p) < colStart(nCell):
if currentTok(p).kind != tkWhite:
rstMessage(p, meIllformedTable,
"this word crosses table column from the left")
else:
inc p.idx
else:
row[nCell].add(currentTok(p).symbol)
inc p.idx
else:
if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite:
rstMessage(p, meIllformedTable,
"this word crosses table column from the right")
inc nCell
if currentTok(p).kind == tkIndent: inc p.idx
if tokEnd(p) <= colEnd(0): break
# Continued current cells because the 1st column is empty.
if currentTok(p).kind in {tkEof, tkAdornment}:
break
for nCell in countup(1, high(row)): row[nCell].add('\n')
result = newRstNode(rnTableRow)
var q: RstParser
for uCell in 0 ..< row.len:
initParser(q, p.s)
q.col = colStart(uCell)
q.line = line - 1
getTokens(row[uCell], q.tok)
let cell = newRstNode(rnTableDataCell)
cell.span = if spans.len == 0: 0 else: spans[uCell]
cell.add(parseDoc(q))
result.add(cell)
if afterSpan > p.idx:
p.idx = afterSpan
proc parseSimpleTable(p: var RstParser): PRstNode =
var
cols: IntSeq
row: seq[string]
i, last, line: int
c: char
q: RstParser
a, b: PRstNode
var cols: RstCols
result = newRstNodeA(p, rnTable)
cols = @[]
row = @[]
a = nil
c = currentTok(p).symbol[0]
let startIdx = getColumns(p, cols, p.idx)
let colChar = currentTok(p).symbol[0]
checkColumns(p, cols)
p.idx = startIdx
result.colCount = cols.len
while true:
if currentTok(p).kind == tkAdornment:
last = tokenAfterNewline(p)
if p.tok[last].kind in {tkEof, tkIndent}:
checkColumns(p, cols)
p.idx = tokenAfterNewline(p)
if currentTok(p).kind in {tkEof, tkIndent}:
# skip last adornment line:
p.idx = last
break
getColumns(p, cols)
setLen(row, cols.len)
if a != nil:
for j in 0 ..< a.len: # fix rnTableDataCell -> rnTableHeaderCell
a.sons[j] = newRstNode(rnTableHeaderCell, a.sons[j].sons)
if result.sons.len > 0: result.sons[^1].endsHeader = true
# fix rnTableDataCell -> rnTableHeaderCell for previous table rows:
for nRow in 0 ..< result.sons.len:
for nCell in 0 ..< result.sons[nRow].len:
result.sons[nRow].sons[nCell].kind = rnTableHeaderCell
if currentTok(p).kind == tkEof: break
for j in countup(0, high(row)): row[j] = ""
# the following while loop iterates over the lines a single cell may span:
line = currentTok(p).line
while true:
i = 0
while currentTok(p).kind notin {tkIndent, tkEof}:
if tokEnd(p) <= cols[i]:
row[i].add(currentTok(p).symbol)
inc p.idx
else:
if currentTok(p).kind == tkWhite: inc p.idx
inc i
if currentTok(p).kind == tkIndent: inc p.idx
if tokEnd(p) <= cols[0]: break
if currentTok(p).kind in {tkEof, tkAdornment}: break
for j in countup(1, high(row)): row[j].add('\n')
a = newRstNode(rnTableRow)
for j in countup(0, high(row)):
initParser(q, p.s)
q.col = cols[j]
q.line = line - 1
getTokens(row[j], q.tok)
b = newRstNode(rnTableDataCell)
b.add(parseDoc(q))
a.add(b)
result.add(a)
let tabRow = parseSimpleTableRow(p, cols, colChar)
result.add tabRow
proc readTableRow(p: var RstParser): ColSeq =
if currentTok(p).symbol == "|": inc p.idx
@@ -2574,17 +2669,16 @@ proc isValidDelimiterRow(p: var RstParser, colNum: int): bool =
proc parseMarkdownTable(p: var RstParser): PRstNode =
var
row: ColSeq
colNum: int
a, b: PRstNode
q: RstParser
result = newRstNodeA(p, rnMarkdownTable)
proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) =
row = readTableRow(p)
if colNum == 0: colNum = row.len # table header
elif row.len < colNum: row.setLen(colNum)
if result.colCount == 0: result.colCount = row.len # table header
elif row.len < result.colCount: row.setLen(result.colCount)
a = newRstNode(rnTableRow)
for j in 0 ..< colNum:
for j in 0 ..< result.colCount:
b = newRstNode(cellKind)
initParser(q, p.s)
q.col = p.col
@@ -2595,7 +2689,8 @@ proc parseMarkdownTable(p: var RstParser): PRstNode =
result.add(a)
parseRow(p, rnTableHeaderCell, result)
if not isValidDelimiterRow(p, colNum): rstMessage(p, meMarkdownIllformedTable)
if not isValidDelimiterRow(p, result.colCount):
rstMessage(p, meMarkdownIllformedTable)
while predNL(p) and currentTok(p).symbol == "|":
parseRow(p, rnTableDataCell, result)

View File

@@ -112,6 +112,12 @@ type
## nodes that are post-processed after parsing
of rnNimdocRef:
tooltip*: string
of rnTable, rnGridTable, rnMarkdownTable:
colCount*: int ## Number of (not-united) cells in the table
of rnTableRow:
endsHeader*: bool ## Is last row in the header of table?
of rnTableHeaderCell, rnTableDataCell:
span*: int ## Number of table columns that the cell occupies
else:
discard
anchor*: string ## anchor, internal link target
@@ -416,6 +422,13 @@ proc treeRepr*(node: PRstNode, indent=0): string =
result.add (if node.order == 0: "" else: " order=" & $node.order)
of rnMarkdownBlockQuoteItem:
result.add " quotationDepth=" & $node.quotationDepth
of rnTable, rnGridTable, rnMarkdownTable:
result.add " colCount=" & $node.colCount
of rnTableHeaderCell, rnTableDataCell:
if node.span > 0:
result.add " span=" & $node.span
of rnTableRow:
if node.endsHeader: result.add " endsHeader"
else:
discard
result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'")

View File

@@ -1091,10 +1091,6 @@ proc renderContainer(d: PDoc, n: PRstNode, result: var string) =
else:
dispA(d.target, result, "<div class=\"$1\">$2</div>", "$2", [arg, tmp])
proc texColumns(n: PRstNode): string =
let nColumns = if n.sons.len > 0: len(n.sons[0]) else: 1
result = "L".repeat(nColumns)
proc renderField(d: PDoc, n: PRstNode, result: var string) =
var b = false
if d.target == outLatex:
@@ -1323,24 +1319,49 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
renderAux(d, n,
"<table$2 border=\"1\" class=\"docutils\">$1</table>",
"\n$2\n\\begin{rsttab}{" &
texColumns(n) & "}\n\\hline\n$1\\end{rsttab}", result)
"L".repeat(n.colCount) & "}\n\\toprule\n$1" &
"\\addlinespace[0.1em]\\bottomrule\n\\end{rsttab}", result)
of rnTableRow:
if len(n) >= 1:
if d.target == outLatex:
#var tmp = ""
renderRstToOut(d, n.sons[0], result)
for i in countup(1, len(n) - 1):
result.add(" & ")
renderRstToOut(d, n.sons[i], result)
result.add("\\\\\n\\hline\n")
else:
case d.target
of outHtml:
result.add("<tr>")
renderAux(d, n, result)
result.add("</tr>\n")
of rnTableDataCell:
renderAux(d, n, "<td>$1</td>", "$1", result)
of rnTableHeaderCell:
renderAux(d, n, "<th>$1</th>", "\\textbf{$1}", result)
of outLatex:
if n.sons[0].kind == rnTableHeaderCell:
result.add "\\rowcolor{gray!15} "
var spanLines: seq[(int, int)]
var nCell = 0
for uCell in 0 .. n.len - 1:
renderRstToOut(d, n.sons[uCell], result)
if n.sons[uCell].span > 0:
spanLines.add (nCell + 1, nCell + n.sons[uCell].span)
nCell += n.sons[uCell].span
else:
nCell += 1
if uCell != n.len - 1:
result.add(" & ")
result.add("\\\\")
if n.endsHeader: result.add("\\midrule\n")
for (start, stop) in spanLines:
result.add("\\cmidrule(lr){$1-$2}" % [$start, $stop])
result.add("\n")
of rnTableHeaderCell, rnTableDataCell:
case d.target
of outHtml:
let tag = if n.kind == rnTableHeaderCell: "th" else: "td"
var spanSpec: string
if n.span <= 1: spanSpec = ""
else:
spanSpec = " colspan=\"" & $n.span & "\" style=\"text-align: center\""
renderAux(d, n, "<$1$2>$$1</$1>" % [tag, spanSpec], "", result)
of outLatex:
let text = if n.kind == rnTableHeaderCell: "\\textbf{$1}" else: "$1"
var latexStr: string
if n.span <= 1: latexStr = text
else: latexStr = "\\multicolumn{" & $n.span & "}{c}{" & text & "}"
renderAux(d, n, "", latexStr, result)
of rnFootnoteGroup:
renderAux(d, n,
"<hr class=\"footnote\">" &

View File

@@ -8,11 +8,11 @@
#
## Nim coroutines implementation, supports several context switching methods:
## -------- ------------
## ======== ============
## ucontext available on unix and alike (default)
## setjmp available on unix and alike (x86/64 only)
## fibers available and required on windows.
## -------- ------------
## ======== ============
##
## -d:nimCoroutines Required to build this module.
## -d:nimCoroutinesUcontext Use ucontext backend.

View File

@@ -2383,21 +2383,21 @@ iterator walkDirRec*(dir: string,
##
## Walking is recursive. `followFilter` controls the behaviour of the iterator:
##
## --------------------- ---------------------------------------------
## ===================== =============================================
## yieldFilter meaning
## --------------------- ---------------------------------------------
## ===================== =============================================
## ``pcFile`` yield real files (default)
## ``pcLinkToFile`` yield symbolic links to files
## ``pcDir`` yield real directories
## ``pcLinkToDir`` yield symbolic links to directories
## --------------------- ---------------------------------------------
## ===================== =============================================
##
## --------------------- ---------------------------------------------
## ===================== =============================================
## followFilter meaning
## --------------------- ---------------------------------------------
## ===================== =============================================
## ``pcDir`` follow real directories (default)
## ``pcLinkToDir`` follow symbolic links to directories
## --------------------- ---------------------------------------------
## ===================== =============================================
##
##
## See also:

View File

@@ -552,15 +552,15 @@ proc escape*(s: string): string =
##
## Escapes these characters:
##
## ------------ -------------------
## ============ ===================
## char is converted to
## ------------ -------------------
## ============ ===================
## ``<`` ``&lt;``
## ``>`` ``&gt;``
## ``&`` ``&amp;``
## ``"`` ``&quot;``
## ``'`` ``&apos;``
## ------------ -------------------
## ============ ===================
##
## You can also use `addEscaped proc <#addEscaped,string,string>`_.
result = newStringOfCap(s.len)