add RST highlighting for command line / shells (also fixes #16858) (#17789)

This commit is contained in:
Andrey Makarov
2021-04-21 17:57:54 +03:00
committed by GitHub
parent 80389b8053
commit 8f79bc5f3d
10 changed files with 291 additions and 140 deletions

View File

@@ -37,6 +37,18 @@
## .. code:: Nim
## for l in ["C", "c++", "jAvA", "Nim", "c#"]: echo getSourceLanguage(l)
##
## There is also a `Cmd` pseudo-language supported, which is a simple generic
## shell/cmdline tokenizer (UNIX shell/Powershell/Windows Command):
## no escaping, no programming language constructs besides variable definition
## at the beginning of line. It supports these operators:
##
## .. code:: Cmd
## & && | || ( ) '' "" ; # for comments
##
## Instead of escaping always use quotes like here
## `nimgrep --ext:'nim|nims' file.name`:cmd: shows how to input ``|``.
## Any argument that contains ``.`` or ``/`` or ``\`` will be treated
## as a file or directory.
import
strutils
@@ -45,7 +57,7 @@ from algorithm import binarySearch
type
SourceLanguage* = enum
langNone, langNim, langCpp, langCsharp, langC, langJava,
langYaml, langPython
langYaml, langPython, langCmd
TokenClass* = enum
gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber,
gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit,
@@ -53,7 +65,7 @@ type
gtOperator, gtPunctuation, gtComment, gtLongComment, gtRegularExpression,
gtTagStart, gtTagEnd, gtKey, gtValue, gtRawData, gtAssembler,
gtPreprocessor, gtDirective, gtCommand, gtRule, gtHyperlink, gtLabel,
gtReference, gtOther
gtReference, gtProgram, gtOption, gtOther
GeneralTokenizer* = object of RootObj
kind*: TokenClass
start*, length*: int
@@ -64,14 +76,17 @@ type
const
sourceLanguageToStr*: array[SourceLanguage, string] = ["none",
"Nim", "C++", "C#", "C", "Java", "Yaml", "Python"]
"Nim", "C++", "C#", "C", "Java", "Yaml", "Python", "Cmd"]
tokenClassToStr*: array[TokenClass, string] = ["Eof", "None", "Whitespace",
"DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber",
"Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit",
"EscapeSequence", "Operator", "Punctuation", "Comment", "LongComment",
"RegularExpression", "TagStart", "TagEnd", "Key", "Value", "RawData",
"Assembler", "Preprocessor", "Directive", "Command", "Rule", "Hyperlink",
"Label", "Reference", "Other"]
"Label", "Reference",
# start from lower-case if there is a corresponding RST role (see rst.nim)
"program", "option",
"Other"]
# The following list comes from doc/keywords.txt, make sure it is
# synchronized with this array by running the module itself as a test case.
@@ -898,6 +913,65 @@ proc pythonNextToken(g: var GeneralTokenizer) =
"with", "yield"]
nimNextToken(g, keywords)
proc cmdNextToken(g: var GeneralTokenizer) =
var pos = g.pos
g.start = g.pos
if g.state == low(TokenClass):
g.state = gtProgram
case g.buf[pos]
of ' ', '\t'..'\r':
g.kind = gtWhitespace
while g.buf[pos] in {' ', '\t'..'\r'}:
if g.buf[pos] == '\n':
g.state = gtProgram
inc(pos)
of '\'', '"':
g.kind = gtOption
let q = g.buf[pos]
inc(pos)
while g.buf[pos] notin {q, '\0'}:
inc(pos)
if g.buf[pos] == q: inc(pos)
of '#':
g.kind = gtComment
while g.buf[pos] notin {'\n', '\0'}:
inc(pos)
of '&', '|':
g.kind = gtOperator
inc(pos)
if g.buf[pos] == g.buf[pos-1]: inc(pos)
g.state = gtProgram
of '(':
g.kind = gtOperator
g.state = gtProgram
inc(pos)
of ')':
g.kind = gtOperator
inc(pos)
of ';':
g.state = gtProgram
g.kind = gtOperator
inc(pos)
of '\0': g.kind = gtEof
else:
if g.state == gtProgram:
g.kind = gtProgram
g.state = gtOption
else:
g.kind = gtOption
while g.buf[pos] notin {' ', '\t'..'\r', '&', '|', '(', ')', '\'', '"', '\0'}:
if g.buf[pos] == ';' and g.buf[pos+1] == ' ':
# (check space because ';' can be used inside arguments in Win bat)
break
if g.kind == gtOption and g.buf[pos] in {'/', '\\', '.'}:
g.kind = gtIdentifier # for file/dir name
elif g.kind == gtProgram and g.buf[pos] == '=':
g.kind = gtIdentifier # for env variable setting at beginning of line
g.state = gtProgram
inc(pos)
g.length = pos - g.pos
g.pos = pos
proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
g.lang = lang
case lang
@@ -909,6 +983,7 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
of langJava: javaNextToken(g)
of langYaml: yamlNextToken(g)
of langPython: pythonNextToken(g)
of langCmd: cmdNextToken(g)
when isMainModule:
var keywords: seq[string]

View File

@@ -23,10 +23,10 @@
##
## Nim can output the result to HTML [#html]_ or Latex [#latex]_.
##
## .. [#html] commands ``nim doc`` for ``*.nim`` files and
## ``nim rst2html`` for ``*.rst`` files
## .. [#html] commands `nim doc`:cmd: for ``*.nim`` files and
## `nim rst2html`:cmd: for ``*.rst`` files
##
## .. [#latex] command ``nim rst2tex`` for ``*.rst``.
## .. [#latex] command `nim rst2tex`:cmd: for ``*.rst``.
##
## If you are new to RST please consider reading the following:
##
@@ -78,14 +78,21 @@
##
## * directives: ``code-block`` [cmp:Sphinx]_, ``title``,
## ``index`` [cmp:Sphinx]_
## * predefined roles ``:nim:`` (default), ``:c:`` (C programming language),
## ``:python:``, ``:yaml:``, ``:java:``, ``:cpp:`` (C++), ``:csharp`` (C#).
## That is every language that `highlite <highlite.html>`_ supports.
## They turn on appropriate syntax highlighting in inline code.
## * predefined roles
## - ``:nim:`` (default), ``:c:`` (C programming language),
## ``:python:``, ``:yaml:``, ``:java:``, ``:cpp:`` (C++), ``:csharp`` (C#).
## That is every language that `highlite <highlite.html>`_ supports.
## They turn on appropriate syntax highlighting in inline code.
##
## .. Note:: default role for Nim files is ``:nim:``,
## for ``*.rst`` it's currently ``:literal:``.
## .. Note:: default role for Nim files is ``:nim:``,
## for ``*.rst`` it's currently ``:literal:``.
##
## - generic command line highlighting roles:
## - ``:cmd:`` for commands and common shells syntax
## - ``:program:`` for executable names [cmp:Sphinx]_
## (one can just use ``:cmd:`` on single word)
## - ``:option:`` for command line options [cmp:Sphinx]_
## - ``:tok:``, a role for highlighting of programming language tokens
## * ***triple emphasis*** (bold and italic) using \*\*\*
## * ``:idx:`` role for \`interpreted text\` to include the link to this
## text into an index (example: `Nim index`_).
@@ -95,11 +102,11 @@
## //compile compile the project
## //doc generate documentation
##
## Here the dummy `//` will disappear, while options ``compile``
## and ``doc`` will be left in the final document.
## Here the dummy `//` will disappear, while options `compile`:option:
## and `doc`:option: will be left in the final document.
##
## .. [cmp:Sphinx] similar but different from the directives of
## Python `Sphinx directives`_ extensions
## Python `Sphinx directives`_ and `Sphinx roles`_ extensions
##
## .. _`extra features`:
##
@@ -144,7 +151,7 @@
## -----
##
## See `Nim DocGen Tools Guide <docgen.html>`_ for the details about
## ``nim doc``, ``nim rst2html`` and ``nim rst2tex`` commands.
## `nim doc`:cmd:, `nim rst2html`:cmd: and `nim rst2tex`:cmd: commands.
##
## See `packages/docutils/rstgen module <rstgen.html>`_ to know how to
## generate HTML or Latex strings to embed them into your documents.
@@ -156,6 +163,7 @@
## .. _RST roles list: https://docutils.sourceforge.io/docs/ref/rst/roles.html
## .. _Nim index: https://nim-lang.org/docs/theindex.html
## .. _Sphinx directives: https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html
## .. _Sphinx roles: https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html
import
os, strutils, rstast, std/enumutils, algorithm, lists, sequtils,
@@ -530,7 +538,7 @@ proc defaultRole(options: RstParseOptions): string =
# mirror highlite.nim sourceLanguageToStr with substitutions c++ cpp, c# csharp
const supportedLanguages = ["nim", "yaml", "python", "java", "c",
"cpp", "csharp"]
"cpp", "csharp", "cmd"]
proc whichRoleAux(sym: string): RstNodeKind =
let r = sym.toLowerAscii
@@ -543,6 +551,7 @@ proc whichRoleAux(sym: string): RstNodeKind =
of "sup", "superscript": result = rnSup
# literal and code are the same in our implementation
of "code": result = rnInlineLiteral
of "program", "option", "tok": result = rnCodeFragment
# c++ currently can be spelled only as cpp, c# only as csharp
elif r in supportedLanguages:
result = rnInlineCode
@@ -1113,10 +1122,10 @@ proc toInlineCode(n: PRstNode, language: string): PRstNode =
lb.add newLeaf(s)
result.add lb
proc toUnknownRole(n: PRstNode, roleName: string): PRstNode =
proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode =
let newN = newRstNode(rnInner, n.sons)
let newSons = @[newN, newLeaf(roleName)]
result = newRstNode(rnUnknownRole, newSons)
result = newRstNode(kind, newSons)
proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
var newKind = n.kind
@@ -1144,8 +1153,8 @@ proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
# a role:
let (roleName, lastIdx) = getRefname(p, p.idx+1)
newKind = whichRole(p, roleName)
if newKind == rnUnknownRole:
result = n.toUnknownRole(roleName)
if newKind in {rnUnknownRole, rnCodeFragment}:
result = n.toOtherRole(newKind, roleName)
elif newKind == rnInlineCode:
result = n.toInlineCode(language=roleName)
else:
@@ -1417,8 +1426,8 @@ proc parseInline(p: var RstParser, father: PRstNode) =
if k == rnInlineCode:
n = n.toInlineCode(language=roleName)
parseUntil(p, n, "`", false) # bug #17260
if k == rnUnknownRole:
n = n.toUnknownRole(roleName)
if k in {rnUnknownRole, rnCodeFragment}:
n = n.toOtherRole(k, roleName)
father.add(n)
elif isInlineMarkupStart(p, "`"):
var n = newRstNode(rnInterpretedText)

View File

@@ -56,7 +56,9 @@ type
# * `file#id <file#id>'_
rnSubstitutionDef, # a definition of a substitution
# Inline markup:
rnInlineCode,
rnInlineCode, # interpreted text with code in a known language
rnCodeFragment, # inline code for highlighting with the specified
# class (which cannot be inferred from context)
rnUnknownRole, # interpreted text with an unknown role
rnSub, rnSup, rnIdx,
rnEmphasis, # "*"

View File

@@ -1198,7 +1198,8 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
"$1", result)
of rnOptionGroup:
renderAux(d, n,
"<div class=\"option-list-label\">$1</div>",
"<div class=\"option-list-label\"><tt><span class=\"option\">" &
"$1</span></tt></div>",
"\\item[$1]", result)
of rnDescription:
renderAux(d, n, "<div class=\"option-list-description\">$1</div>",
@@ -1319,13 +1320,22 @@ proc renderRstToOut(d: PDoc, n: PRstNode, result: var string) =
renderAux(d, n, "|$1|", "|$1|", result)
of rnDirective:
renderAux(d, n, "", "", result)
of rnUnknownRole:
of rnUnknownRole, rnCodeFragment:
var tmp0 = ""
var tmp1 = ""
renderRstToOut(d, n.sons[0], tmp0)
renderRstToOut(d, n.sons[1], tmp1)
dispA(d.target, result, "<span class=\"$2\">$1</span>", "\\span$2{$1}",
[tmp0, tmp1])
var class = tmp1
# don't allow missing role break latex compilation:
if d.target == outLatex and n.kind == rnUnknownRole: class = "Other"
if n.kind == rnCodeFragment:
dispA(d.target, result,
"<tt class=\"docutils literal\"><span class=\"pre $2\">" &
"$1</span></tt>",
"\\texttt{\\span$2{$1}}", [tmp0, class])
else: # rnUnknownRole, not necessarily code/monospace font
dispA(d.target, result, "<span class=\"$2\">$1</span>", "\\span$2{$1}",
[tmp0, class])
of rnSub: renderAux(d, n, "<sub>$1</sub>", "\\rstsub{$1}", result)
of rnSup: renderAux(d, n, "<sup>$1</sup>", "\\rstsup{$1}", result)
of rnEmphasis: renderAux(d, n, "<em>$1</em>", "\\emph{$1}", result)