highlite: fix #17890 - tokenize Nim escape seq-s (#17919)

* highlite: fix #17890 - tokenize Nim escape seq-s

* Update tests/stdlib/thighlite.nim

Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>

Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>
This commit is contained in:
Andrey Makarov
2021-05-03 11:21:36 +03:00
committed by GitHub
parent 0dc534832e
commit 287f1170ba
2 changed files with 48 additions and 21 deletions

View File

@@ -190,31 +190,33 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
var pos = g.pos
g.start = g.pos
if g.state == gtStringLit:
g.kind = gtStringLit
while true:
if g.buf[pos] == '\\':
g.kind = gtEscapeSequence
inc(pos)
case g.buf[pos]
of '\\':
g.kind = gtEscapeSequence
of 'x', 'X':
inc(pos)
case g.buf[pos]
of 'x', 'X':
inc(pos)
if g.buf[pos] in hexChars: inc(pos)
if g.buf[pos] in hexChars: inc(pos)
of '0'..'9':
while g.buf[pos] in {'0'..'9'}: inc(pos)
of '\0':
g.state = gtNone
else: inc(pos)
break
of '\0', '\r', '\n':
if g.buf[pos] in hexChars: inc(pos)
if g.buf[pos] in hexChars: inc(pos)
of '0'..'9':
while g.buf[pos] in {'0'..'9'}: inc(pos)
of '\0':
g.state = gtNone
break
of '\"':
inc(pos)
g.state = gtNone
break
else: inc(pos)
else:
g.kind = gtStringLit
while true:
case g.buf[pos]
of '\\':
break
of '\0', '\r', '\n':
g.state = gtNone
break
of '\"':
inc(pos)
g.state = gtNone
break
else: inc(pos)
else:
case g.buf[pos]
of ' ', '\t'..'\r':
@@ -985,6 +987,18 @@ proc getNextToken*(g: var GeneralTokenizer, lang: SourceLanguage) =
of langPython: pythonNextToken(g)
of langCmd: cmdNextToken(g)
proc tokenize*(text: string, lang: SourceLanguage): seq[(string, TokenClass)] =
var g: GeneralTokenizer
initGeneralTokenizer(g, text)
var prevPos = 0
while true:
getNextToken(g, lang)
if g.kind == gtEof:
break
var s = text[prevPos ..< g.pos]
result.add (s, g.kind)
prevPos = g.pos
when isMainModule:
var keywords: seq[string]
# Try to work running in both the subdir or at the root.

View File

@@ -0,0 +1,13 @@
import unittest
import ../../lib/packages/docutils/highlite
block: # Nim tokenizing"
test "string literals and escape seq":
check("\"ok1\\nok2\\nok3\"".tokenize(langNim) ==
@[("\"ok1", gtStringLit), ("\\n", gtEscapeSequence), ("ok2", gtStringLit),
("\\n", gtEscapeSequence), ("ok3\"", gtStringLit)
])
check("\"\"\"ok1\\nok2\\nok3\"\"\"".tokenize(langNim) ==
@[("\"\"\"ok1\\nok2\\nok3\"\"\"", gtLongStringLit)
])