RST tables: fix latex col number; allow less than three of = (#16040)

This commit is contained in:
Andrey Makarov
2020-12-04 10:50:17 +03:00
committed by GitHub
parent e4e5a0c65a
commit 6877e0c8a3
5 changed files with 263 additions and 31 deletions

View File

@@ -1,6 +1,6 @@
==============================================
==========================================================
Nim Enhancement Proposal #1 - Standard Library Style Guide
==============================================
==========================================================
:Author: Clay Sweetser, Dominik Picheta
:Version: |nimversion|

View File

@@ -101,7 +101,12 @@ const
type
TokType = enum
tkEof, tkIndent, tkWhite, tkWord, tkAdornment, tkPunct, tkOther
tkEof, tkIndent,
tkWhite, tkWord,
tkAdornment, # used for chapter adornment, transitions and
# horizontal table borders
tkPunct, # one or many punctuation characters
tkOther
Token = object # a RST token
kind*: TokType # the type of the token
ival*: int # the indentation or parsed integer value
@@ -114,6 +119,7 @@ type
bufpos*: int
line*, col*, baseIndent*: int
skipPounds*: bool
adornmentLine*: bool
proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
tok.kind = tkWord
@@ -127,8 +133,26 @@ proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
inc L.col, pos - L.bufpos
L.bufpos = pos
proc getAdornment(L: var Lexer, tok: var Token) =
tok.kind = tkAdornment
proc isCurrentLineAdornment(L: var Lexer): bool =
var pos = L.bufpos
let c = L.buf[pos]
while true:
inc pos
if L.buf[pos] in {'\c', '\l', '\0'}:
break
if c == '+': # grid table
if L.buf[pos] notin {'-', '=', '+'}:
return false
else: # section adornment or table horizontal border
if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}:
return false
result = true
proc getPunctAdornment(L: var Lexer, tok: var Token) =
if L.adornmentLine:
tok.kind = tkAdornment
else:
tok.kind = tkPunct
tok.line = L.line
tok.col = L.col
var pos = L.bufpos
@@ -139,6 +163,8 @@ proc getAdornment(L: var Lexer, tok: var Token) =
if L.buf[pos] != c: break
inc L.col, pos - L.bufpos
L.bufpos = pos
if tok.symbol == "\\": tok.kind = tkPunct
# nim extension: standalone \ can not be adornment
proc getBracket(L: var Lexer, tok: var Token) =
tok.kind = tkPunct
@@ -189,6 +215,8 @@ proc getIndent(L: var Lexer, tok: var Token) =
proc rawGetTok(L: var Lexer, tok: var Token) =
tok.symbol = ""
tok.ival = 0
if L.col == 0:
L.adornmentLine = false
var c = L.buf[L.bufpos]
case c
of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9':
@@ -200,11 +228,13 @@ proc rawGetTok(L: var Lexer, tok: var Token) =
rawGetTok(L, tok) # ignore spaces before \n
of '\x0D', '\x0A':
getIndent(L, tok)
L.adornmentLine = false
of '!', '\"', '#', '$', '%', '&', '\'', '*', '+', ',', '-', '.',
'/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`',
'|', '~':
getAdornment(L, tok)
if tok.symbol.len <= 3: tok.kind = tkPunct
if L.col == 0:
L.adornmentLine = L.isCurrentLineAdornment()
getPunctAdornment(L, tok)
of '(', ')', '[', ']', '{', '}':
getBracket(L, tok)
else:
@@ -730,7 +760,7 @@ proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
of tkEof:
rstMessage(p, meExpected, "```")
break
of tkPunct:
of tkPunct, tkAdornment:
if currentTok(p).symbol == "```":
inc p.idx
break
@@ -822,6 +852,10 @@ proc parseInline(p: var RstParser, father: PRstNode) =
return
parseUrl(p, father)
of tkAdornment, tkOther, tkWhite:
if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
inc p.idx
father.add(parseMarkdownCodeblock(p))
return
if roSupportSmilies in p.s.options:
let n = parseSmiley(p)
if n != nil:
@@ -1011,6 +1045,18 @@ proc tokenAfterNewline(p: RstParser): int =
break
else: inc result
proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
var headlineLen = 0
if p.idx < adornmentIdx: # underline
for i in p.idx ..< adornmentIdx-1: # adornmentIdx-1 is a linebreak
headlineLen += p.tok[i].symbol.len
else: # overline
var i = p.idx + 2
while p.tok[i].kind notin {tkEof, tkIndent}:
headlineLen += p.tok[i].symbol.len
inc i
return p.tok[adornmentIdx].symbol.len >= headlineLen
proc isLineBlock(p: RstParser): bool =
var j = tokenAfterNewline(p)
result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or
@@ -1052,13 +1098,26 @@ proc findPipe(p: RstParser, start: int): bool =
inc i
proc whichSection(p: RstParser): RstNodeKind =
if currentTok(p).kind in {tkAdornment, tkPunct}:
# for punctuation sequences that can be both tkAdornment and tkPunct
if roSupportMarkdown in p.s.options and currentTok(p).symbol == "```":
return rnCodeBlock
elif currentTok(p).symbol == "::":
return rnLiteralBlock
elif currentTok(p).symbol == ".." and predNL(p):
return rnDirective
case currentTok(p).kind
of tkAdornment:
if match(p, p.idx + 1, "ii"): result = rnTransition
if match(p, p.idx + 1, "ii") and currentTok(p).symbol.len >= 4:
result = rnTransition
elif match(p, p.idx, "+a+"):
result = rnGridTable
rstMessage(p, meGridTableNotImplemented)
elif match(p, p.idx + 1, " a"): result = rnTable
elif match(p, p.idx + 1, "i"): result = rnOverline
elif isMarkdownHeadline(p):
result = rnHeadline
elif currentTok(p).symbol == "|" and isLineBlock(p):
result = rnLineBlock
elif match(p, p.idx + 1, "i") and isAdornmentHeadline(p, p.idx):
result = rnOverline
else:
result = rnLeaf
of tkPunct:
@@ -1067,27 +1126,18 @@ proc whichSection(p: RstParser): RstNodeKind =
elif roSupportMarkdown in p.s.options and predNL(p) and
match(p, p.idx, "| w") and findPipe(p, p.idx+3):
result = rnMarkdownTable
elif currentTok(p).symbol == "```":
result = rnCodeBlock
elif currentTok(p).symbol == "|" and isLineBlock(p):
result = rnLineBlock
elif match(p, tokenAfterNewline(p), "ai"):
result = rnHeadline
elif currentTok(p).symbol == "::":
result = rnLiteralBlock
elif predNL(p) and
currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite:
result = rnBulletList
elif currentTok(p).symbol == "|" and isLineBlock(p):
result = rnLineBlock
elif currentTok(p).symbol == ".." and predNL(p):
result = rnDirective
elif match(p, p.idx, ":w:") and predNL(p):
# (currentTok(p).symbol == ":")
result = rnFieldList
elif match(p, p.idx, "(e) ") or match(p, p.idx, "e. "):
result = rnEnumList
elif match(p, p.idx, "+a+"):
result = rnGridTable
rstMessage(p, meGridTableNotImplemented)
elif isDefList(p):
result = rnDefList
elif isOptionList(p):
@@ -1095,7 +1145,10 @@ proc whichSection(p: RstParser): RstNodeKind =
else:
result = rnParagraph
of tkWord, tkOther, tkWhite:
if match(p, tokenAfterNewline(p), "ai"): result = rnHeadline
let tokIdx = tokenAfterNewline(p)
if match(p, tokIdx, "ai"):
if isAdornmentHeadline(p, tokIdx): result = rnHeadline
else: result = rnParagraph
elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
elif isDefList(p): result = rnDefList
else: result = rnParagraph

View File

@@ -1009,7 +1009,8 @@ proc renderContainer(d: PDoc, n: PRstNode, result: var string) =
proc texColumns(n: PRstNode): string =
result = ""
for i in countup(1, len(n)): add(result, "|X")
let nColumns = if n.sons.len > 0: len(n.sons[0]) else: 1
for i in countup(1, nColumns): add(result, "|X")
proc renderField(d: PDoc, n: PRstNode, result: var string) =
var b = false

View File

@@ -1,6 +1,6 @@
==========
================
Not a Nim Manual
==========
================
:Authors: Andreas Rumpf, Zahary Karadjov
:Version: |nimversion|

View File

@@ -6,7 +6,7 @@ outputsub: ""
import ../../lib/packages/docutils/rstgen
import ../../lib/packages/docutils/rst
import unittest, strtabs
import unittest, strutils, strtabs
suite "YAML syntax highlighting":
test "Basics":
@@ -144,6 +144,12 @@ suite "YAML syntax highlighting":
<span class="StringLit">?not a map key</span></pre>"""
suite "RST/Markdown general":
test "RST emphasis":
assert rstToHtml("*Hello* **world**!", {},
newStringTable(modeStyleInsensitive)) ==
"<em>Hello</em> <strong>world</strong>!"
test "Markdown links":
let
a = rstToHtml("(( [Nim](https://nim-lang.org/) ))", {roSupportMarkdown}, defaultConfig())
@@ -178,7 +184,179 @@ not in table"""
assert output2 == """<table border="1" class="docutils"><tr><th>A1 header</th><th>A2</th></tr>
</table>"""
test "RST tables":
let input1 = """
Test 2 column/4 rows table:
==== ===
H0 H1
==== ===
A0 A1
==== ===
A2 A3
==== ===
A4 A5
==== === """
let output1 = rstToLatex(input1, {})
assert "{|X|X|}" in output1 # 2 columns
assert count(output1, "\\\\") == 4 # 4 rows
for cell in ["H0", "H1", "A0", "A1", "A2", "A3", "A4", "A5"]:
assert cell in output1
assert rstToHtml("*Hello* **world**!", {},
newStringTable(modeStyleInsensitive)) ==
"<em>Hello</em> <strong>world</strong>!"
let input2 = """
Now test 3 columns / 2 rows, and also borders containing 4 =, 3 =, 1 = signs:
==== === =
H0 H1 H
==== === =
A0 A1 X
Ax Y
==== === = """
let output2 = rstToLatex(input2, {})
assert "{|X|X|X|}" in output2 # 3 columns
assert count(output2, "\\\\") == 2 # 2 rows
for cell in ["H0", "H1", "H", "A0", "A1", "X", "Ax", "Y"]:
assert cell in output2
test "RST adornments":
let input1 = """
Check that a few punctuation symbols are not parsed as adornments:
:word1: word2 .... word3 """
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
discard output1
test "RST sections":
let input1 = """
Long chapter name
'''''''''''''''''''
"""
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
assert "Long chapter name" in output1 and "<h1" in output1
let input2 = """
Short chapter name:
ChA
===
"""
let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
assert "ChA" in output2 and "<h1" in output2
let input3 = """
Very short chapter name:
X
~
"""
let output3 = rstToHtml(input3, {roSupportMarkdown}, defaultConfig())
assert "X" in output3 and "<h1" in output3
let input4 = """
Check that short underline is not enough to make section:
Wrong chapter
------------
"""
let output4 = rstToHtml(input4, {roSupportMarkdown}, defaultConfig())
assert "Wrong chapter" in output4 and "<h1" notin output4
let input5 = """
Check that punctuation after adornment and indent are not detected as adornment.
Some chapter
--------------
"punctuation symbols" """
let output5 = rstToHtml(input5, {roSupportMarkdown}, defaultConfig())
assert "&quot;punctuation symbols&quot;" in output5 and "<h1" in output5
test "RST links":
let input1 = """
Want to learn about `my favorite programming language`_?
.. _my favorite programming language: https://nim-lang.org"""
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
assert "<a" in output1 and "href=\"https://nim-lang.org\"" in output1
test "RST transitions":
let input1 = """
context1
~~~~
context2
"""
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
assert "<hr" in output1
let input2 = """
This is too short to be a transition:
---
context2
"""
let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
assert "<hr" notin output2
test "RST literal block":
let input1 = """
Test literal block
::
check """
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
assert "<pre>" in output1
test "Markdown code block":
let input1 = """
```
let x = 1
``` """
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
assert "<pre" in output1 and "class=\"Keyword\"" notin output1
let input2 = """
Parse the block with language specifier:
```Nim
let x = 1
``` """
let output2 = rstToHtml(input2, {roSupportMarkdown}, defaultConfig())
assert "<pre" in output2 and "class=\"Keyword\"" in output2
test "RST comments":
let input1 = """
Check that comment disappears:
..
some comment """
let output1 = rstToHtml(input1, {roSupportMarkdown}, defaultConfig())
assert output1 == "Check that comment disappears:"
test "RST line blocks":
let input1 = """
=====
Test1
=====
|
|
| line block
| other line
"""
var option: bool
var rstGenera: RstGenerator
var output1: string
rstGenera.initRstGenerator(outHtml, defaultConfig(), "input", {})
rstGenera.renderRstToOut(rstParse(input1, "", 1, 1, option, {}), output1)
assert rstGenera.meta[metaTitle] == "Test1"
# check that title was not overwritten to '|'
assert "line block<br />" in output1
assert "other line<br />" in output1
let output1l = rstToLatex(input1, {})
assert "line block\\\\" in output1l
assert "other line\\\\" in output1l