mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-28 17:04:41 +00:00
Added Python to highlite module. (#16286)
* Added Python to highlite module. * Added the keywords for Python * Refactored the 'pythonLikeNextToken' into 'nimNextToken': - `lang` property added to GeneralTokenizer object, is set in `getNextToken`. - `nimNextToken` accepts `keywords` parameter, used for languages different from Nim. - Multiline comment available only for `langNim`.
This commit is contained in:
@@ -45,6 +45,9 @@ import
|
||||
from algorithm import binarySearch
|
||||
|
||||
type
|
||||
SourceLanguage* = enum
|
||||
langNone, langNim, langCpp, langCsharp, langC, langJava,
|
||||
langYaml, langPython
|
||||
TokenClass* = enum
|
||||
gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber,
|
||||
gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit,
|
||||
@@ -59,14 +62,11 @@ type
|
||||
buf: cstring
|
||||
pos: int
|
||||
state: TokenClass
|
||||
|
||||
SourceLanguage* = enum
|
||||
langNone, langNim, langCpp, langCsharp, langC, langJava,
|
||||
langYaml
|
||||
lang: SourceLanguage
|
||||
|
||||
const
|
||||
sourceLanguageToStr*: array[SourceLanguage, string] = ["none",
|
||||
"Nim", "C++", "C#", "C", "Java", "Yaml"]
|
||||
"Nim", "C++", "C#", "C", "Java", "Yaml", "Python"]
|
||||
tokenClassToStr*: array[TokenClass, string] = ["Eof", "None", "Whitespace",
|
||||
"DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber",
|
||||
"Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit",
|
||||
@@ -101,6 +101,7 @@ proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: cstring) =
|
||||
g.start = 0
|
||||
g.length = 0
|
||||
g.state = low(TokenClass)
|
||||
g.lang = low(SourceLanguage)
|
||||
var pos = 0 # skip initial whitespace:
|
||||
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
|
||||
g.pos = pos
|
||||
@@ -161,7 +162,13 @@ const
|
||||
OpChars = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
|
||||
'|', '=', '%', '&', '$', '@', '~', ':'}
|
||||
|
||||
proc nimNextToken(g: var GeneralTokenizer) =
|
||||
proc isKeyword(x: openArray[string], y: string): int =
|
||||
binarySearch(x, y)
|
||||
|
||||
proc isKeywordIgnoreCase(x: openArray[string], y: string): int =
|
||||
binarySearch(x, y, cmpIgnoreCase)
|
||||
|
||||
proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
|
||||
const
|
||||
hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'}
|
||||
octChars = {'0'..'7', '_'}
|
||||
@@ -207,7 +214,7 @@ proc nimNextToken(g: var GeneralTokenizer) =
|
||||
if g.buf[pos] == '#':
|
||||
inc(pos)
|
||||
isDoc = true
|
||||
if g.buf[pos] == '[':
|
||||
if g.buf[pos] == '[' and g.lang == langNim:
|
||||
g.kind = gtLongComment
|
||||
var nesting = 0
|
||||
while true:
|
||||
@@ -265,7 +272,10 @@ proc nimNextToken(g: var GeneralTokenizer) =
|
||||
inc(pos)
|
||||
if g.buf[pos] == '\"': inc(pos)
|
||||
else:
|
||||
g.kind = nimGetKeyword(id)
|
||||
if g.lang == langNim:
|
||||
g.kind = nimGetKeyword(id)
|
||||
elif isKeyword(keywords, id) >= 0:
|
||||
g.kind = gtKeyword
|
||||
of '0':
|
||||
inc(pos)
|
||||
case g.buf[pos]
|
||||
@@ -394,12 +404,6 @@ proc generalStrLit(g: var GeneralTokenizer, position: int): int =
|
||||
inc(pos)
|
||||
result = pos
|
||||
|
||||
proc isKeyword(x: openArray[string], y: string): int =
|
||||
binarySearch(x, y)
|
||||
|
||||
proc isKeywordIgnoreCase(x: openArray[string], y: string): int =
|
||||
binarySearch(x, y, cmpIgnoreCase)
|
||||
|
||||
type
|
||||
TokenizerFlag = enum
|
||||
hasPreprocessor, hasNestedComments
|
||||
@@ -886,7 +890,18 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
||||
g.length = pos - g.pos
|
||||
g.pos = pos
|
||||
|
||||
proc pythonNextToken(g: var GeneralTokenizer) =
|
||||
const
|
||||
keywords: array[0..34, string] = [
|
||||
"False", "None", "True", "and", "as", "assert", "async", "await",
|
||||
"break", "class", "continue", "def", "del", "elif", "else", "except",
|
||||
"finally", "for", "from", "global", "if", "import", "in", "is", "lambda",
|
||||
"nonlocal", "not", "or", "pass", "raise", "return", "try", "while",
|
||||
"with", "yield"]
|
||||
nimNextToken(g, keywords)
|
||||
|
||||
proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
|
||||
g.lang = lang
|
||||
case lang
|
||||
of langNone: assert false
|
||||
of langNim: nimNextToken(g)
|
||||
@@ -895,6 +910,7 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
|
||||
of langC: cNextToken(g)
|
||||
of langJava: javaNextToken(g)
|
||||
of langYaml: yamlNextToken(g)
|
||||
of langPython: pythonNextToken(g)
|
||||
|
||||
when isMainModule:
|
||||
var keywords: seq[string]
|
||||
|
||||
@@ -188,13 +188,13 @@ not in table"""
|
||||
let input1 = """
|
||||
Test 2 column/4 rows table:
|
||||
==== ===
|
||||
H0 H1
|
||||
H0 H1
|
||||
==== ===
|
||||
A0 A1
|
||||
A0 A1
|
||||
==== ===
|
||||
A2 A3
|
||||
A2 A3
|
||||
==== ===
|
||||
A4 A5
|
||||
A4 A5
|
||||
==== === """
|
||||
let output1 = rstToLatex(input1, {})
|
||||
assert "{|X|X|}" in output1 # 2 columns
|
||||
@@ -360,3 +360,19 @@ Test1
|
||||
let output1l = rstToLatex(input1, {})
|
||||
assert "line block\\\\" in output1l
|
||||
assert "other line\\\\" in output1l
|
||||
|
||||
suite "RST/Code highlight":
|
||||
test "Basic Python code highlight":
|
||||
let pythonCode = """
|
||||
.. code-block:: python
|
||||
|
||||
def f_name(arg=42):
|
||||
print(f"{arg}")
|
||||
|
||||
"""
|
||||
|
||||
let expected = """<blockquote><p><span class="Keyword">def</span> f_name<span class="Punctuation">(</span><span class="Punctuation">arg</span><span class="Operator">=</span><span class="DecNumber">42</span><span class="Punctuation">)</span><span class="Punctuation">:</span>
|
||||
print<span class="Punctuation">(</span><span class="RawData">f"{arg}"</span><span class="Punctuation">)</span></p></blockquote>"""
|
||||
|
||||
check strip(rstToHtml(pythonCode, {}, newStringTable(modeCaseSensitive))) ==
|
||||
strip(expected)
|
||||
|
||||
Reference in New Issue
Block a user