RST tables: fix latex col number; allow less than three of = (#16040)

2025-12-28 17:04:41 +00:00 · 2020-12-04 10:50:17 +03:00
parent e4e5a0c65a
commit 6877e0c8a3
5 changed files with 263 additions and 31 deletions
--- a/doc/nep1.rst
+++ b/doc/nep1.rst
@@ -1,6 +1,6 @@
-==============================================
+==========================================================
 Nim Enhancement Proposal #1 - Standard Library Style Guide
-==============================================
+==========================================================
 :Author: Clay Sweetser, Dominik Picheta
 :Version: |nimversion|

--- a/lib/packages/docutils/rst.nim
+++ b/lib/packages/docutils/rst.nim
@@ -101,7 +101,12 @@ const

 type
  TokType = enum
-    tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther
+    tkEof, tkIndent,
+    tkWhite, tkWord,
+    tkAdornment,              # used for chapter adornment, transitions and
+                              # horizontal table borders
+    tkPunct,                  # one or many punctuation characters
+    tkOther
  Token = object              # a RST token
    kind*: TokType            # the type of the token
    ival*: int                # the indentation or parsed integer value
@@ -114,6 +119,7 @@ type
    bufpos*: int
    line*, col*, baseIndent*: int
    skipPounds*: bool
+    adornmentLine*: bool

 proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
  tok.kind = tkWord
@@ -127,8 +133,26 @@ proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
  inc L.col, pos - L.bufpos
  L.bufpos = pos

-proc getAdornment(L: var Lexer, tok: var Token) =
-  tok.kind = tkAdornment
+proc isCurrentLineAdornment(L: var Lexer): bool =
+  var pos = L.bufpos
+  let c = L.buf[pos]
+  while true:
+    inc pos
+    if L.buf[pos] in {'\c', '\l', '\0'}:
+      break
+    if c == '+':  # grid table
+      if L.buf[pos] notin {'-', '=', '+'}:
+        return false
+    else:  # section adornment or table horizontal border
+      if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}:
+        return false
+  result = true
+
+proc getPunctAdornment(L: var Lexer, tok: var Token) =
+  if L.adornmentLine:
+    tok.kind = tkAdornment
+  else:
+    tok.kind = tkPunct
  tok.line = L.line
  tok.col = L.col
  var pos = L.bufpos
@@ -139,6 +163,8 @@ proc getAdornment(L: var Lexer, tok: var Token) =
    if L.buf[pos] != c: break
  inc L.col, pos - L.bufpos
  L.bufpos = pos
+  if tok.symbol == "\\": tok.kind = tkPunct
+    # nim extension: standalone \ can not be adornment

 proc getBracket(L: var Lexer, tok: var Token) =
  tok.kind = tkPunct
@@ -189,6 +215,8 @@ proc getIndent(L: var Lexer, tok: var Token) =
 proc rawGetTok(L: var Lexer, tok: var Token) =
  tok.symbol = ""
  tok.ival = 0
+  if L.col == 0:
+    L.adornmentLine = false
  var c = L.buf[L.bufpos]
  case c
  of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9':
@@ -200,11 +228,13 @@ proc rawGetTok(L: var Lexer, tok: var Token) =
      rawGetTok(L, tok)       # ignore spaces before \n
  of '\x0D', '\x0A':
    getIndent(L, tok)
+    L.adornmentLine = false
  of '!', '\"', '#', '$', '%', '&', '\'',  '*', '+', ',', '-', '.',
     '/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`',
     '|', '~':
-    getAdornment(L, tok)
-    if tok.symbol.len <= 3: tok.kind = tkPunct
+    if L.col == 0:
+      L.adornmentLine = L.isCurrentLineAdornment()
+    getPunctAdornment(L, tok)
  of '(', ')', '[', ']', '{', '}':
    getBracket(L, tok)
  else:
@@ -730,7 +760,7 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
    of tkEof:
      rstMessage(p, meExpected, "```")
      break
-    of tkPunct:
+    of tkPunct, tkAdornment:
      if currentTok(p).symbol == "```":
        inc p.idx
        break
@@ -822,6 +852,10 @@ proc parseInline(p: var RstParser, father: PRstNode) =
        return
    parseUrl(p, father)
  of tkAdornment, tkOther, tkWhite:
+    if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
+      inc p.idx
+      father.add(parseMarkdownCodeblock(p))
+      return
    if roSupportSmilies in p.s.options:
      let n = parseSmiley(p)
      if n != nil:
@@ -1011,6 +1045,18 @@ proc tokenAfterNewline(p: RstParser): int =
      break
    else: inc result

+proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
+  var headlineLen = 0
+  if p.idx < adornmentIdx:  # underline
+    for i in p.idx ..< adornmentIdx-1:  # adornmentIdx-1 is a linebreak
+      headlineLen += p.tok[i].symbol.len
+  else:  # overline
+    var i = p.idx + 2
+    while p.tok[i].kind notin {tkEof, tkIndent}:
+      headlineLen += p.tok[i].symbol.len
+      inc i
+  return p.tok[adornmentIdx].symbol.len >= headlineLen
+
 proc isLineBlock(p: RstParser): bool =
  var j = tokenAfterNewline(p)
  result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or
@@ -1052,13 +1098,26 @@ proc findPipe(p: RstParser, start: int): bool =
    inc i

 proc whichSection(p: RstParser): RstNodeKind =
+  if currentTok(p).kind in {tkAdornment, tkPunct}:
+    # for punctuation sequences that can be both tkAdornment and tkPunct
+    if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
+      return rnCodeBlock
+    elif currentTok(p).symbol == "::":
+      return rnLiteralBlock
+    elif currentTok(p).symbol == ".." and predNL(p):
+     return rnDirective
  case currentTok(p).kind
  of tkAdornment:
-    if match(p, p.idx + 1, "ii"): result = rnTransition
+    if match(p, p.idx + 1, "ii") and currentTok(p).symbol.len >= 4:
+      result = rnTransition
+    elif match(p, p.idx, "+a+"):
+      result = rnGridTable
+      rstMessage(p, meGridTableNotImplemented)
    elif match(p, p.idx + 1, " a"): result = rnTable
-    elif match(p, p.idx + 1, "i"): result = rnOverline
-    elif isMarkdownHeadline(p):
-      result = rnHeadline
+    elif currentTok(p).symbol == "|" and isLineBlock(p):
+      result = rnLineBlock
+    elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx):
+      result = rnOverline
    else:
      result = rnLeaf
  of tkPunct:
@@ -1067,27 +1126,18 @@ proc whichSection(p: RstParser): RstNodeKind =
    elif roSupportMarkdown in p.s.options and predNL(p) and
        match(p, p.idx, "| w") and findPipe(p, p.idx+3):
      result = rnMarkdownTable
-    elif currentTok(p).symbol == "```":
-      result = rnCodeBlock
+    elif currentTok(p).symbol == "|" and isLineBlock(p):
+      result = rnLineBlock
    elif match(p, tokenAfterNewline(p), "ai"):
      result = rnHeadline
-    elif currentTok(p).symbol == "::":
-      result = rnLiteralBlock
    elif predNL(p) and
        currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite:
      result = rnBulletList
-    elif currentTok(p).symbol == "|" and isLineBlock(p):
-      result = rnLineBlock
-    elif currentTok(p).symbol == ".." and predNL(p):
-      result = rnDirective
    elif match(p, p.idx, ":w:") and predNL(p):
      # (currentTok(p).symbol == ":")
      result = rnFieldList
    elif match(p, p.idx, "(e) ") or match(p, p.idx, "e. "):
      result = rnEnumList
-    elif match(p, p.idx, "+a+"):
-      result = rnGridTable
-      rstMessage(p, meGridTableNotImplemented)
    elif isDefList(p):
      result = rnDefList
    elif isOptionList(p):
@@ -1095,7 +1145,10 @@ proc whichSection(p: RstParser): RstNodeKind =
    else:
      result = rnParagraph
  of tkWord, tkOther, tkWhite:
-    if match(p, tokenAfterNewline(p), "ai"): result = rnHeadline
+    let tokIdx = tokenAfterNewline(p)
+    if match(p, tokIdx, "ai"):
+      if isAdornmentHeadline(p, tokIdx): result = rnHeadline
+      else: result = rnParagraph
    elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
    elif isDefList(p): result = rnDefList
    else: result = rnParagraph
--- a/lib/packages/docutils/rstgen.nim
+++ b/lib/packages/docutils/rstgen.nim
@@ -1009,7 +1009,8 @@ proc renderContainer(d: PDoc, n: PRstNode, result: var string) =

 proc texColumns(n: PRstNode): string =
  result = ""
-  for i in countup(1, len(n)): add(result, "|X")
+  let nColumns = if n.sons.len > 0: len(n.sons[0]) else: 1
+  for i in countup(1, nColumns): add(result, "|X")

 proc renderField(d: PDoc, n: PRstNode, result: var string) =
  var b = false
--- a/nimdoc/rst2html/source/rst_examples.rst
+++ b/nimdoc/rst2html/source/rst_examples.rst
@@ -1,6 +1,6 @@
-==========
+================
 Not a Nim Manual
-==========
+================

 :Authors: Andreas Rumpf, Zahary Karadjov
 :Version: |nimversion|
--- a/tests/stdlib/trstgen.nim
+++ b/tests/stdlib/trstgen.nim
@@ -6,7 +6,7 @@ outputsub: ""

 import ../../lib/packages/docutils/rstgen
 import ../../lib/packages/docutils/rst
-import unittest, strtabs
+import unittest, strutils, strtabs

 suite "YAML syntax highlighting":
  test "Basics":
@@ -144,6 +144,12 @@ suite "YAML syntax highlighting":
  <span class="StringLit">?not a map key</span></pre>"""


+suite "RST/Markdown general":
+  test "RST emphasis":
+    assert rstToHtml("*Hello* **world**!", {},
+      newStringTable(modeStyleInsensitive)) ==
+      "<em>Hello</em> <strong>world</strong>!"
+
  test "Markdown links":
    let
      a = rstToHtml("(( [Nim](https://nim-lang.org/) ))", {roSupportMarkdown}, defaultConfig())
@@ -178,7 +184,179 @@ not in table"""
    assert output2 == """<table border="1" class="docutils"><tr><th>A1 header</th><th>A2</th></tr>
 </table>"""

+  test "RST tables":
+    let input1 = """
+Test 2 column/4 rows table:
+====   ===
+H0     H1 
+====   ===
+A0     A1 
+====   ===
+A2     A3 
+====   ===
+A4     A5 
+====   === """
+    let output1 = rstToLatex(input1, {})
+    assert "{|X|X|}" in output1  # 2 columns
+    assert count(output1, "\\\\") == 4  # 4 rows
+    for cell in ["H0", "H1", "A0", "A1", "A2", "A3", "A4", "A5"]:
+      assert cell in output1

-assert rstToHtml("*Hello* **world**!", {},
-  newStringTable(modeStyleInsensitive)) ==
-  "<em>Hello</em> <strong>world</strong>!"
+    let input2 = """
+Now test 3 columns / 2 rows, and also borders containing 4 =, 3 =, 1 = signs:
+
+====   ===  =
+H0     H1   H
+====   ===  =
+A0     A1   X
+       Ax   Y
+====   ===  = """
+    let output2 = rstToLatex(input2, {})
+    assert "{|X|X|X|}" in output2  # 3 columns
+    assert count(output2, "\\\\") == 2  # 2 rows
+    for cell in ["H0", "H1", "H", "A0", "A1", "X", "Ax", "Y"]:
+      assert cell in output2
+
+
+  test "RST adornments":
+    let input1 = """
+Check that a few punctuation symbols are not parsed as adornments:
+:word1: word2 .... word3 """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    discard output1
+
+  test "RST sections":
+    let input1 = """
+Long chapter name
+'''''''''''''''''''
+"""
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "Long chapter name" in output1 and "<h1" in output1
+
+    let input2 = """
+Short chapter name:
+
+ChA
+===
+"""
+    let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
+    assert "ChA" in output2 and "<h1" in output2
+
+    let input3 = """
+Very short chapter name:
+
+X
+~
+"""
+    let output3 = rstToHtml(input3, {roSupportMarkdown}, defaultConfig())
+    assert "X" in output3 and "<h1" in output3
+
+    let input4 = """
+Check that short underline is not enough to make section:
+
+Wrong chapter
+------------
+
+"""
+    let output4 = rstToHtml(input4, {roSupportMarkdown}, defaultConfig())
+    assert "Wrong chapter" in output4 and "<h1" notin output4
+
+    let input5 = """
+Check that punctuation after adornment and indent are not detected as adornment.
+
+Some chapter
+--------------
+
+  "punctuation symbols" """
+    let output5 = rstToHtml(input5, {roSupportMarkdown}, defaultConfig())
+    assert "&quot;punctuation symbols&quot;" in output5 and "<h1" in output5
+
+
+  test "RST links":
+    let input1 = """
+Want to learn about `my favorite programming language`_?
+
+.. _my favorite programming language: https://nim-lang.org"""
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "<a" in output1 and "href=\"https://nim-lang.org\"" in output1
+
+  test "RST transitions":
+    let input1 = """
+context1
+
+~~~~
+
+context2
+"""
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "<hr" in output1
+
+    let input2 = """
+This is too short to be a transition:
+
+---
+
+context2
+"""
+    let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
+    assert "<hr" notin output2
+
+  test "RST literal block":
+    let input1 = """
+Test literal block
+
+::
+
+  check """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "<pre>" in output1
+
+  test "Markdown code block":
+    let input1 = """
+```
+let x = 1
+``` """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert "<pre" in output1 and "class=\"Keyword\"" notin output1
+
+    let input2 = """
+Parse the block with language specifier:
+```Nim
+let x = 1
+``` """
+    let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
+    assert "<pre" in output2 and "class=\"Keyword\"" in output2
+
+  test "RST comments":
+    let input1 = """
+Check that comment disappears:
+
+..
+  some comment """
+    let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
+    assert output1 == "Check that comment disappears:"
+
+  test "RST line blocks":
+    let input1 = """
+=====
+Test1
+=====
+
+|
+|
+| line block
+| other line
+
+"""
+    var option: bool
+    var rstGenera: RstGenerator
+    var output1: string
+    rstGenera.initRstGenerator(outHtml, defaultConfig(), "input", {})
+    rstGenera.renderRstToOut(rstParse(input1, "", 1, 1, option, {}), output1)
+    assert rstGenera.meta[metaTitle] == "Test1"
+      # check that title was not overwritten to '|'
+    assert "line block<br />" in output1
+    assert "other line<br />" in output1
+    let output1l = rstToLatex(input1, {})
+    assert "line block\\\\" in output1l
+    assert "other line\\\\" in output1l