RST: improve simple tables (#19859)

* RST: improve simple tables * nim 1.0 gotchas * Still allow legacy boundaries like `----`
2026-04-25 16:53:59 +00:00 · 2022-06-04 08:03:03 +03:00
parent f7a13f62d6
commit 4341b06f65
16 changed files with 487 additions and 120 deletions
--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -255,6 +255,7 @@ type
    meExpected = "'$1' expected",
    meGridTableNotImplemented = "grid table is not implemented",
    meMarkdownIllformedTable = "illformed delimiter row of a Markdown table",
+    meIllformedTable = "Illformed table: $1",
    meNewSectionExpected = "new section expected $1",
    meGeneralParseError = "general parse error",
    meInvalidDirective = "invalid directive: '$1'",
@@ -2467,81 +2468,175 @@ proc parseOverline(p: var RstParser): PRstNode =
               anchorType=headlineAnchor)

 type
-  IntSeq = seq[int]
-  ColumnLimits = tuple
+  ColSpec = object
+    start, stop: int
+  RstCols = seq[ColSpec]
+  ColumnLimits = tuple  # for Markdown
    first, last: int
  ColSeq = seq[ColumnLimits]

-proc tokEnd(p: RstParser): int =
-  result = currentTok(p).col + currentTok(p).symbol.len - 1
+proc tokStart(p: RstParser, idx: int): int =
+  result = p.tok[idx].col

-proc getColumns(p: var RstParser, cols: var IntSeq) =
+proc tokStart(p: RstParser): int =
+  result = tokStart(p, p.idx)
+
+proc tokEnd(p: RstParser, idx: int): int =
+  result = p.tok[idx].col + p.tok[idx].symbol.len - 1
+
+proc tokEnd(p: RstParser): int =
+  result = tokEnd(p, p.idx)
+
+proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int =
+  # Fills table column specification (or separator) `cols` and returns
+  # the next parser index after it.
  var L = 0
+  result = startIdx
  while true:
    inc L
    setLen(cols, L)
-    cols[L - 1] = tokEnd(p)
-    assert(currentTok(p).kind == tkAdornment)
-    inc p.idx
-    if currentTok(p).kind != tkWhite: break
-    inc p.idx
-    if currentTok(p).kind != tkAdornment: break
-  if currentTok(p).kind == tkIndent: inc p.idx
-  # last column has no limit:
-  cols[L - 1] = 32000
+    cols[L - 1].start = tokStart(p, result)
+    cols[L - 1].stop = tokEnd(p, result)
+    assert(p.tok[result].kind == tkAdornment)
+    inc result
+    if p.tok[result].kind != tkWhite: break
+    inc result
+    if p.tok[result].kind != tkAdornment: break
+  if p.tok[result].kind == tkIndent: inc result
+
+proc checkColumns(p: RstParser, cols: RstCols) =
+  var
+    i = p.idx
+    col = 0
+  if p.tok[i].symbol[0] != '=':
+    rstMessage(p, mwRstStyle,
+               "only tables with `=` columns specification are allowed")
+  for col in 0 ..< cols.len:
+    if tokEnd(p, i) != cols[col].stop:
+      rstMessage(p, meIllformedTable,
+                 "end of table column #$1 should end at position $2" % [
+                   $(col+1), $(cols[col].stop+ColRstOffset)],
+                 p.tok[i].line, tokEnd(p, i))
+    inc i
+    if col == cols.len - 1:
+      if p.tok[i].kind == tkWhite:
+        inc i
+      if p.tok[i].kind notin {tkIndent, tkEof}:
+        rstMessage(p, meIllformedTable, "extraneous column specification")
+    elif p.tok[i].kind == tkWhite:
+      inc i
+    else:
+      rstMessage(p, meIllformedTable, "no enough table columns",
+                 p.tok[i].line, p.tok[i].col)
+
+proc getSpans(p: RstParser, nextLine: int,
+              cols: RstCols, unitedCols: RstCols): seq[int] =
+  ## Calculates how many columns a joined cell occupies.
+  if unitedCols.len > 0:
+    result = newSeq[int](unitedCols.len)
+    var
+      iCell = 0
+      jCell = 0
+      uCell = 0
+    while jCell < cols.len:
+      if cols[jCell].stop < unitedCols[uCell].stop:
+        inc jCell
+      elif cols[jCell].stop == unitedCols[uCell].stop:
+        result[uCell] = jCell - iCell + 1
+        iCell = jCell + 1
+        jCell = jCell + 1
+        inc uCell
+      else:
+        rstMessage(p, meIllformedTable,
+                   "spanning underline does not match main table columns",
+                   p.tok[nextLine].line, p.tok[nextLine].col)
+
+proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode =
+  ## Parses 1 row in RST simple table.
+  # Consider that columns may be spanning (united by using underline like ----):
+  let nextLine = tokenAfterNewline(p)
+  var unitedCols: RstCols
+  var afterSpan: int
+  if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-':
+    afterSpan = getColumns(p, unitedCols, nextLine)
+    if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar:
+      # legacy rst.nim compat.: allow punctuation like `----` in main boundaries
+      afterSpan = nextLine
+      unitedCols.setLen 0
+  else:
+    afterSpan = nextLine
+  template colEnd(i): int =
+    if i == cols.len - 1: high(int)  # last column has no limit
+    elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop
+  template colStart(i): int =
+    if unitedCols.len > 0: unitedCols[i].start else: cols[i].start
+  var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len)
+  var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols)
+
+  let line = currentTok(p).line
+  # Iterate over the lines a single cell may span:
+  while true:
+    var nCell = 0
+    # distribute tokens between cells in the current line:
+    while currentTok(p).kind notin {tkIndent, tkEof}:
+      if tokEnd(p) <= colEnd(nCell):
+        if tokStart(p) < colStart(nCell):
+          if currentTok(p).kind != tkWhite:
+            rstMessage(p, meIllformedTable,
+                       "this word crosses table column from the left")
+          else:
+            inc p.idx
+        else:
+          row[nCell].add(currentTok(p).symbol)
+          inc p.idx
+      else:
+        if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite:
+          rstMessage(p, meIllformedTable,
+                     "this word crosses table column from the right")
+        inc nCell
+    if currentTok(p).kind == tkIndent: inc p.idx
+    if tokEnd(p) <= colEnd(0): break
+    # Continued current cells because the 1st column is empty.
+    if currentTok(p).kind in {tkEof, tkAdornment}:
+      break
+    for nCell in countup(1, high(row)): row[nCell].add('\n')
+  result = newRstNode(rnTableRow)
+  var q: RstParser
+  for uCell in 0 ..< row.len:
+    initParser(q, p.s)
+    q.col = colStart(uCell)
+    q.line = line - 1
+    getTokens(row[uCell], q.tok)
+    let cell = newRstNode(rnTableDataCell)
+    cell.span = if spans.len == 0: 0 else: spans[uCell]
+    cell.add(parseDoc(q))
+    result.add(cell)
+  if afterSpan > p.idx:
+    p.idx = afterSpan

 proc parseSimpleTable(p: var RstParser): PRstNode =
-  var
-    cols: IntSeq
-    row: seq[string]
-    i, last, line: int
-    c: char
-    q: RstParser
-    a, b: PRstNode
+  var cols: RstCols
  result = newRstNodeA(p, rnTable)
-  cols = @[]
-  row = @[]
-  a = nil
-  c = currentTok(p).symbol[0]
+  let startIdx = getColumns(p, cols, p.idx)
+  let colChar = currentTok(p).symbol[0]
+  checkColumns(p, cols)
+  p.idx = startIdx
+  result.colCount = cols.len
  while true:
    if currentTok(p).kind == tkAdornment:
-      last = tokenAfterNewline(p)
-      if p.tok[last].kind in {tkEof, tkIndent}:
+      checkColumns(p, cols)
+      p.idx = tokenAfterNewline(p)
+      if currentTok(p).kind in {tkEof, tkIndent}:
        # skip last adornment line:
-        p.idx = last
        break
-      getColumns(p, cols)
-      setLen(row, cols.len)
-      if a != nil:
-        for j in 0 ..< a.len:  # fix rnTableDataCell -> rnTableHeaderCell
-          a.sons[j] = newRstNode(rnTableHeaderCell, a.sons[j].sons)
+      if result.sons.len > 0: result.sons[^1].endsHeader = true
+      # fix rnTableDataCell -> rnTableHeaderCell for previous table rows:
+      for nRow in 0 ..< result.sons.len:
+        for nCell in 0 ..< result.sons[nRow].len:
+          result.sons[nRow].sons[nCell].kind = rnTableHeaderCell
    if currentTok(p).kind == tkEof: break
-    for j in countup(0, high(row)): row[j] = ""
-    # the following while loop iterates over the lines a single cell may span:
-    line = currentTok(p).line
-    while true:
-      i = 0
-      while currentTok(p).kind notin {tkIndent, tkEof}:
-        if tokEnd(p) <= cols[i]:
-          row[i].add(currentTok(p).symbol)
-          inc p.idx
-        else:
-          if currentTok(p).kind == tkWhite: inc p.idx
-          inc i
-      if currentTok(p).kind == tkIndent: inc p.idx
-      if tokEnd(p) <= cols[0]: break
-      if currentTok(p).kind in {tkEof, tkAdornment}: break
-      for j in countup(1, high(row)): row[j].add('\n')
-    a = newRstNode(rnTableRow)
-    for j in countup(0, high(row)):
-      initParser(q, p.s)
-      q.col = cols[j]
-      q.line = line - 1
-      getTokens(row[j], q.tok)
-      b = newRstNode(rnTableDataCell)
-      b.add(parseDoc(q))
-      a.add(b)
-    result.add(a)
+    let tabRow = parseSimpleTableRow(p, cols, colChar)
+    result.add tabRow

 proc readTableRow(p: var RstParser): ColSeq =
  if currentTok(p).symbol == "|": inc p.idx
@@ -2574,17 +2669,16 @@ proc isValidDelimiterRow(p: var RstParser, colNum: int): bool =
 proc parseMarkdownTable(p: var RstParser): PRstNode =
  var
    row: ColSeq
-    colNum: int
    a, b: PRstNode
    q: RstParser
  result = newRstNodeA(p, rnMarkdownTable)

  proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) =
    row = readTableRow(p)
-    if colNum == 0: colNum = row.len # table header
-    elif row.len < colNum: row.setLen(colNum)
+    if result.colCount == 0: result.colCount = row.len # table header
+    elif row.len < result.colCount: row.setLen(result.colCount)
    a = newRstNode(rnTableRow)
-    for j in 0 ..< colNum:
+    for j in 0 ..< result.colCount:
      b = newRstNode(cellKind)
      initParser(q, p.s)
      q.col = p.col
@@ -2595,7 +2689,8 @@ proc parseMarkdownTable(p: var RstParser): PRstNode =
    result.add(a)

  parseRow(p, rnTableHeaderCell, result)
-  if not isValidDelimiterRow(p, colNum): rstMessage(p, meMarkdownIllformedTable)
+  if not isValidDelimiterRow(p, result.colCount):
+    rstMessage(p, meMarkdownIllformedTable)
  while predNL(p) and currentTok(p).symbol == "|":
    parseRow(p, rnTableDataCell, result)

--- a/lib/packages/docutils/rstast.nim
+++ b/lib/packages/docutils/rstast.nim
@@ -112,6 +112,12 @@ type
                              ## nodes that are post-processed after parsing
    of rnNimdocRef:
      tooltip*: string
+    of rnTable, rnGridTable, rnMarkdownTable:
+      colCount*: int          ## Number of (not-united) cells in the table
+    of rnTableRow:
+      endsHeader*: bool       ## Is last row in the header of table?
+    of rnTableHeaderCell, rnTableDataCell:
+      span*: int              ## Number of table columns that the cell occupies
    else:
      discard
    anchor*: string           ## anchor, internal link target
@@ -416,6 +422,13 @@ proc treeRepr*(node: PRstNode, indent=0): string =
    result.add (if node.order == 0:   "" else: "  order=" & $node.order)
  of rnMarkdownBlockQuoteItem:
    result.add "  quotationDepth=" & $node.quotationDepth
+  of rnTable, rnGridTable, rnMarkdownTable:
+    result.add "  colCount=" & $node.colCount
+  of rnTableHeaderCell, rnTableDataCell:
+    if node.span > 0:
+      result.add "  span=" & $node.span
+  of rnTableRow:
+    if node.endsHeader: result.add "  endsHeader"
  else:
    discard
  result.add (if node.anchor == "": "" else: "  anchor='" & node.anchor & "'")
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -1091,10 +1091,6 @@ proc renderContainer(d: PDoc, n: PRstNode, result: var string) =
  else:
    dispA(d.target, result, "<div class=\"$1\">$2</div>", "$2", [arg, tmp])

-proc texColumns(n: PRstNode): string =
-  let nColumns = if n.sons.len > 0: len(n.sons[0]) else: 1
-  result = "L".repeat(nColumns)
-
 proc renderField(d: PDoc, n: PRstNode, result: var string) =
  var b = false
  if d.target == outLatex:
@@ -1323,24 +1319,49 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
    renderAux(d, n,
      "<table$2 border=\"1\" class=\"docutils\">$1</table>",
      "\n$2\n\\begin{rsttab}{" &
-        texColumns(n) & "}\n\\hline\n$1\\end{rsttab}", result)
+        "L".repeat(n.colCount) & "}\n\\toprule\n$1" &
+        "\\addlinespace[0.1em]\\bottomrule\n\\end{rsttab}", result)
  of rnTableRow:
    if len(n) >= 1:
-      if d.target == outLatex:
-        #var tmp = ""
-        renderRstToOut(d, n.sons[0], result)
-        for i in countup(1, len(n) - 1):
-          result.add(" & ")
-          renderRstToOut(d, n.sons[i], result)
-        result.add("\\\\\n\\hline\n")
-      else:
+      case d.target
+      of outHtml:
        result.add("<tr>")
        renderAux(d, n, result)
        result.add("</tr>\n")
-  of rnTableDataCell:
-    renderAux(d, n, "<td>$1</td>", "$1", result)
-  of rnTableHeaderCell:
-    renderAux(d, n, "<th>$1</th>", "\\textbf{$1}", result)
+      of outLatex:
+        if n.sons[0].kind == rnTableHeaderCell:
+          result.add "\\rowcolor{gray!15} "
+        var spanLines: seq[(int, int)]
+        var nCell = 0
+        for uCell in 0 .. n.len - 1:
+          renderRstToOut(d, n.sons[uCell], result)
+          if n.sons[uCell].span > 0:
+            spanLines.add (nCell + 1, nCell + n.sons[uCell].span)
+            nCell += n.sons[uCell].span
+          else:
+            nCell += 1
+          if uCell != n.len - 1:
+            result.add(" & ")
+        result.add("\\\\")
+        if n.endsHeader: result.add("\\midrule\n")
+        for (start, stop) in spanLines:
+          result.add("\\cmidrule(lr){$1-$2}" % [$start, $stop])
+        result.add("\n")
+  of rnTableHeaderCell, rnTableDataCell:
+    case d.target
+    of outHtml:
+      let tag = if n.kind == rnTableHeaderCell: "th" else: "td"
+      var spanSpec: string
+      if n.span <= 1: spanSpec = ""
+      else:
+        spanSpec = " colspan=\"" & $n.span & "\" style=\"text-align: center\""
+      renderAux(d, n, "<$1$2>$$1</$1>" % [tag, spanSpec], "", result)
+    of outLatex:
+      let text = if n.kind == rnTableHeaderCell: "\\textbf{$1}" else: "$1"
+      var latexStr: string
+      if n.span <= 1: latexStr = text
+      else: latexStr = "\\multicolumn{" & $n.span & "}{c}{" & text & "}"
+      renderAux(d, n, "", latexStr, result)
  of rnFootnoteGroup:
    renderAux(d, n,
      "<hr class=\"footnote\">" &
--- a/lib/pure/coro.nim
+++ b/lib/pure/coro.nim
@@ -8,11 +8,11 @@
 #

 ## Nim coroutines implementation, supports several context switching methods:
-## --------  ------------
+## ========  ============
 ## ucontext  available on unix and alike (default)
 ## setjmp    available on unix and alike (x86/64 only)
 ## fibers    available and required on windows.
-## --------  ------------
+## ========  ============
 ##
 ## -d:nimCoroutines               Required to build this module.
 ## -d:nimCoroutinesUcontext       Use ucontext backend.
--- a/lib/pure/os.nim
+++ b/lib/pure/os.nim
@@ -2383,21 +2383,21 @@ iterator walkDirRec*(dir: string,
  ##
  ## Walking is recursive. `followFilter` controls the behaviour of the iterator:
  ##
-  ## ---------------------   ---------------------------------------------
+  ## =====================   =============================================
  ## yieldFilter             meaning
-  ## ---------------------   ---------------------------------------------
+  ## =====================   =============================================
  ## ``pcFile``              yield real files (default)
  ## ``pcLinkToFile``        yield symbolic links to files
  ## ``pcDir``               yield real directories
  ## ``pcLinkToDir``         yield symbolic links to directories
-  ## ---------------------   ---------------------------------------------
+  ## =====================   =============================================
  ##
-  ## ---------------------   ---------------------------------------------
+  ## =====================   =============================================
  ## followFilter            meaning
-  ## ---------------------   ---------------------------------------------
+  ## =====================   =============================================
  ## ``pcDir``               follow real directories (default)
  ## ``pcLinkToDir``         follow symbolic links to directories
-  ## ---------------------   ---------------------------------------------
+  ## =====================   =============================================
  ##
  ##
  ## See also:
--- a/lib/pure/xmltree.nim
+++ b/lib/pure/xmltree.nim
@@ -552,15 +552,15 @@ proc escape*(s: string): string =
  ##
  ## Escapes these characters:
  ##
-  ## ------------    -------------------
+  ## ============    ===================
  ## char            is converted to
-  ## ------------    -------------------
+  ## ============    ===================
  ##  ``<``          ``&lt;``
  ##  ``>``          ``&gt;``
  ##  ``&``          ``&amp;``
  ##  ``"``          ``&quot;``
  ##  ``'``          ``&apos;``
-  ## ------------    -------------------
+  ## ============    ===================
  ##
  ## You can also use `addEscaped proc <#addEscaped,string,string>`_.
  result = newStringOfCap(s.len)