|
|
|
|
@@ -103,7 +103,7 @@ proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: cstring) =
|
|
|
|
|
g.state = low(TokenClass)
|
|
|
|
|
g.lang = low(SourceLanguage)
|
|
|
|
|
var pos = 0 # skip initial whitespace:
|
|
|
|
|
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] in {' ', '\t'..'\r'}: inc(pos)
|
|
|
|
|
g.pos = pos
|
|
|
|
|
|
|
|
|
|
proc initGeneralTokenizer*(g: var GeneralTokenizer, buf: string) =
|
|
|
|
|
@@ -194,7 +194,7 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
|
|
|
|
|
g.state = gtNone
|
|
|
|
|
else: inc(pos)
|
|
|
|
|
break
|
|
|
|
|
of '\0', '\x0D', '\x0A':
|
|
|
|
|
of '\0', '\r', '\n':
|
|
|
|
|
g.state = gtNone
|
|
|
|
|
break
|
|
|
|
|
of '\"':
|
|
|
|
|
@@ -204,9 +204,9 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
|
|
|
|
|
else: inc(pos)
|
|
|
|
|
else:
|
|
|
|
|
case g.buf[pos]
|
|
|
|
|
of ' ', '\x09'..'\x0D':
|
|
|
|
|
of ' ', '\t'..'\r':
|
|
|
|
|
g.kind = gtWhitespace
|
|
|
|
|
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] in {' ', '\t'..'\r'}: inc(pos)
|
|
|
|
|
of '#':
|
|
|
|
|
g.kind = gtComment
|
|
|
|
|
inc(pos)
|
|
|
|
|
@@ -243,7 +243,7 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
|
|
|
|
|
else:
|
|
|
|
|
inc pos
|
|
|
|
|
else:
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\n', '\r'}: inc(pos)
|
|
|
|
|
of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF':
|
|
|
|
|
var id = ""
|
|
|
|
|
while g.buf[pos] in SymChars + {'_'}:
|
|
|
|
|
@@ -267,7 +267,7 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
|
|
|
|
|
else:
|
|
|
|
|
g.kind = gtRawData
|
|
|
|
|
inc(pos)
|
|
|
|
|
while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}):
|
|
|
|
|
while not (g.buf[pos] in {'\0', '\n', '\r'}):
|
|
|
|
|
if g.buf[pos] == '"' and g.buf[pos+1] != '"': break
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] == '\"': inc(pos)
|
|
|
|
|
@@ -302,7 +302,7 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
|
|
|
|
|
g.kind = gtCharLit
|
|
|
|
|
while true:
|
|
|
|
|
case g.buf[pos]
|
|
|
|
|
of '\0', '\x0D', '\x0A':
|
|
|
|
|
of '\0', '\r', '\n':
|
|
|
|
|
break
|
|
|
|
|
of '\'':
|
|
|
|
|
inc(pos)
|
|
|
|
|
@@ -330,7 +330,7 @@ proc nimNextToken(g: var GeneralTokenizer, keywords: openArray[string] = @[]) =
|
|
|
|
|
g.kind = gtStringLit
|
|
|
|
|
while true:
|
|
|
|
|
case g.buf[pos]
|
|
|
|
|
of '\0', '\x0D', '\x0A':
|
|
|
|
|
of '\0', '\r', '\n':
|
|
|
|
|
break
|
|
|
|
|
of '\"':
|
|
|
|
|
inc(pos)
|
|
|
|
|
@@ -436,7 +436,7 @@ proc clikeNextToken(g: var GeneralTokenizer, keywords: openArray[string],
|
|
|
|
|
g.state = gtNone
|
|
|
|
|
else: inc(pos)
|
|
|
|
|
break
|
|
|
|
|
of '\0', '\x0D', '\x0A':
|
|
|
|
|
of '\0', '\r', '\n':
|
|
|
|
|
g.state = gtNone
|
|
|
|
|
break
|
|
|
|
|
of '\"':
|
|
|
|
|
@@ -446,14 +446,14 @@ proc clikeNextToken(g: var GeneralTokenizer, keywords: openArray[string],
|
|
|
|
|
else: inc(pos)
|
|
|
|
|
else:
|
|
|
|
|
case g.buf[pos]
|
|
|
|
|
of ' ', '\x09'..'\x0D':
|
|
|
|
|
of ' ', '\t'..'\r':
|
|
|
|
|
g.kind = gtWhitespace
|
|
|
|
|
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] in {' ', '\t'..'\r'}: inc(pos)
|
|
|
|
|
of '/':
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] == '/':
|
|
|
|
|
g.kind = gtComment
|
|
|
|
|
while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos)
|
|
|
|
|
while not (g.buf[pos] in {'\0', '\n', '\r'}): inc(pos)
|
|
|
|
|
elif g.buf[pos] == '*':
|
|
|
|
|
g.kind = gtLongComment
|
|
|
|
|
var nested = 0
|
|
|
|
|
@@ -593,9 +593,9 @@ proc javaNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
|
|
|
|
|
proc yamlPlainStrLit(g: var GeneralTokenizer, pos: var int) =
|
|
|
|
|
g.kind = gtStringLit
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x09'..'\x0D', ',', ']', '}'}:
|
|
|
|
|
while g.buf[pos] notin {'\0', '\t'..'\r', ',', ']', '}'}:
|
|
|
|
|
if g.buf[pos] == ':' and
|
|
|
|
|
g.buf[pos + 1] in {'\0', '\x09'..'\x0D', ' '}:
|
|
|
|
|
g.buf[pos + 1] in {'\0', '\t'..'\r', ' '}:
|
|
|
|
|
break
|
|
|
|
|
inc(pos)
|
|
|
|
|
|
|
|
|
|
@@ -608,14 +608,14 @@ proc yamlPossibleNumber(g: var GeneralTokenizer, pos: var int) =
|
|
|
|
|
while g.buf[pos] in {'0'..'9'}: inc(pos)
|
|
|
|
|
else: yamlPlainStrLit(g, pos)
|
|
|
|
|
if g.kind == gtNone:
|
|
|
|
|
if g.buf[pos] in {'\0', '\x09'..'\x0D', ' ', ',', ']', '}'}:
|
|
|
|
|
if g.buf[pos] in {'\0', '\t'..'\r', ' ', ',', ']', '}'}:
|
|
|
|
|
g.kind = gtDecNumber
|
|
|
|
|
elif g.buf[pos] == '.':
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] notin {'0'..'9'}: yamlPlainStrLit(g, pos)
|
|
|
|
|
else:
|
|
|
|
|
while g.buf[pos] in {'0'..'9'}: inc(pos)
|
|
|
|
|
if g.buf[pos] in {'\0', '\x09'..'\x0D', ' ', ',', ']', '}'}:
|
|
|
|
|
if g.buf[pos] in {'\0', '\t'..'\r', ' ', ',', ']', '}'}:
|
|
|
|
|
g.kind = gtFloatNumber
|
|
|
|
|
if g.kind == gtNone:
|
|
|
|
|
if g.buf[pos] in {'e', 'E'}:
|
|
|
|
|
@@ -624,13 +624,13 @@ proc yamlPossibleNumber(g: var GeneralTokenizer, pos: var int) =
|
|
|
|
|
if g.buf[pos] notin {'0'..'9'}: yamlPlainStrLit(g, pos)
|
|
|
|
|
else:
|
|
|
|
|
while g.buf[pos] in {'0'..'9'}: inc(pos)
|
|
|
|
|
if g.buf[pos] in {'\0', '\x09'..'\x0D', ' ', ',', ']', '}'}:
|
|
|
|
|
if g.buf[pos] in {'\0', '\t'..'\r', ' ', ',', ']', '}'}:
|
|
|
|
|
g.kind = gtFloatNumber
|
|
|
|
|
else: yamlPlainStrLit(g, pos)
|
|
|
|
|
else: yamlPlainStrLit(g, pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', ',', ']', '}', '\x0A', '\x0D'}:
|
|
|
|
|
while g.buf[pos] notin {'\0', ',', ']', '}', '\n', '\r'}:
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] notin {'\x09'..'\x0D', ' ', ',', ']', '}'}:
|
|
|
|
|
if g.buf[pos] notin {'\t'..'\r', ' ', ',', ']', '}'}:
|
|
|
|
|
yamlPlainStrLit(g, pos)
|
|
|
|
|
break
|
|
|
|
|
# theoretically, we would need to parse indentation (like with block scalars)
|
|
|
|
|
@@ -699,13 +699,13 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
while g.buf[pos] in {' ', '\t'}: inc(pos)
|
|
|
|
|
of '#':
|
|
|
|
|
g.kind = gtComment
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
|
|
|
|
|
of '\x0A', '\x0D': discard
|
|
|
|
|
while g.buf[pos] notin {'\0', '\n', '\r'}: inc(pos)
|
|
|
|
|
of '\n', '\r': discard
|
|
|
|
|
else:
|
|
|
|
|
# illegal here. just don't parse a block scalar
|
|
|
|
|
g.kind = gtNone
|
|
|
|
|
g.state = gtOther
|
|
|
|
|
if g.buf[pos] in {'\x0A', '\x0D'} and g.state == gtCommand:
|
|
|
|
|
if g.buf[pos] in {'\n', '\r'} and g.state == gtCommand:
|
|
|
|
|
g.state = gtLongStringLit
|
|
|
|
|
elif g.state == gtLongStringLit:
|
|
|
|
|
# beware, this is the only token where we actually have to parse
|
|
|
|
|
@@ -714,10 +714,10 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
g.kind = gtLongStringLit
|
|
|
|
|
# first, we have to find the parent indentation of the block scalar, so that
|
|
|
|
|
# we know when to stop
|
|
|
|
|
assert g.buf[pos] in {'\x0A', '\x0D'}
|
|
|
|
|
assert g.buf[pos] in {'\n', '\r'}
|
|
|
|
|
var lookbehind = pos - 1
|
|
|
|
|
var headerStart = -1
|
|
|
|
|
while lookbehind >= 0 and g.buf[lookbehind] notin {'\x0A', '\x0D'}:
|
|
|
|
|
while lookbehind >= 0 and g.buf[lookbehind] notin {'\n', '\r'}:
|
|
|
|
|
if headerStart == -1 and g.buf[lookbehind] in {'|', '>'}:
|
|
|
|
|
headerStart = lookbehind
|
|
|
|
|
dec(lookbehind)
|
|
|
|
|
@@ -728,12 +728,12 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
# when the header is alone in a line, this line does not show the parent's
|
|
|
|
|
# indentation, so we must go further. search the first previous line with
|
|
|
|
|
# non-whitespace content.
|
|
|
|
|
while lookbehind >= 0 and g.buf[lookbehind] in {'\x0A', '\x0D'}:
|
|
|
|
|
while lookbehind >= 0 and g.buf[lookbehind] in {'\n', '\r'}:
|
|
|
|
|
dec(lookbehind)
|
|
|
|
|
while lookbehind >= 0 and
|
|
|
|
|
g.buf[lookbehind] in {' ', '\t'}: dec(lookbehind)
|
|
|
|
|
# now, find the beginning of the line...
|
|
|
|
|
while lookbehind >= 0 and g.buf[lookbehind] notin {'\x0A', '\x0D'}:
|
|
|
|
|
while lookbehind >= 0 and g.buf[lookbehind] notin {'\n', '\r'}:
|
|
|
|
|
dec(lookbehind)
|
|
|
|
|
# ... and its indentation
|
|
|
|
|
indentation = 1
|
|
|
|
|
@@ -741,7 +741,7 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
if lookbehind == -1: indentation = 0 # top level
|
|
|
|
|
elif g.buf[lookbehind + 1] == '-' and g.buf[lookbehind + 2] == '-' and
|
|
|
|
|
g.buf[lookbehind + 3] == '-' and
|
|
|
|
|
g.buf[lookbehind + 4] in {'\x09'..'\x0D', ' '}:
|
|
|
|
|
g.buf[lookbehind + 4] in {'\t'..'\r', ' '}:
|
|
|
|
|
# this is a document start, therefore, we are at top level
|
|
|
|
|
indentation = 0
|
|
|
|
|
# because lookbehind was at newline char when calculating indentation, we're
|
|
|
|
|
@@ -749,7 +749,7 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
let parentIndentation = indentation - 1
|
|
|
|
|
|
|
|
|
|
# find first content
|
|
|
|
|
while g.buf[pos] in {' ', '\x0A', '\x0D'}:
|
|
|
|
|
while g.buf[pos] in {' ', '\n', '\r'}:
|
|
|
|
|
if g.buf[pos] == ' ': inc(indentation)
|
|
|
|
|
else: indentation = 0
|
|
|
|
|
inc(pos)
|
|
|
|
|
@@ -766,12 +766,12 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
if (indentation < minIndentation and g.buf[pos] == '#') or
|
|
|
|
|
(indentation == 0 and g.buf[pos] == '.' and g.buf[pos + 1] == '.' and
|
|
|
|
|
g.buf[pos + 2] == '.' and
|
|
|
|
|
g.buf[pos + 3] in {'\0', '\x09'..'\x0D', ' '}):
|
|
|
|
|
g.buf[pos + 3] in {'\0', '\t'..'\r', ' '}):
|
|
|
|
|
# comment after end of block scalar, or end of document
|
|
|
|
|
break
|
|
|
|
|
minIndentation = min(indentation, minIndentation)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] in {' ', '\x0A', '\x0D'}:
|
|
|
|
|
while g.buf[pos] notin {'\0', '\n', '\r'}: inc(pos)
|
|
|
|
|
while g.buf[pos] in {' ', '\n', '\r'}:
|
|
|
|
|
if g.buf[pos] == ' ': inc(indentation)
|
|
|
|
|
else: indentation = 0
|
|
|
|
|
inc(pos)
|
|
|
|
|
@@ -780,27 +780,27 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
elif g.state == gtOther:
|
|
|
|
|
# gtOther means 'inside YAML document'
|
|
|
|
|
case g.buf[pos]
|
|
|
|
|
of ' ', '\x09'..'\x0D':
|
|
|
|
|
of ' ', '\t'..'\r':
|
|
|
|
|
g.kind = gtWhitespace
|
|
|
|
|
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] in {' ', '\t'..'\r'}: inc(pos)
|
|
|
|
|
of '#':
|
|
|
|
|
g.kind = gtComment
|
|
|
|
|
inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\n', '\r'}: inc(pos)
|
|
|
|
|
of '-':
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] in {'\0', ' ', '\x09'..'\x0D'}:
|
|
|
|
|
if g.buf[pos] in {'\0', ' ', '\t'..'\r'}:
|
|
|
|
|
g.kind = gtPunctuation
|
|
|
|
|
elif g.buf[pos] == '-' and
|
|
|
|
|
(pos == 1 or g.buf[pos - 2] in {'\x0A', '\x0D'}): # start of line
|
|
|
|
|
(pos == 1 or g.buf[pos - 2] in {'\n', '\r'}): # start of line
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] == '-' and g.buf[pos + 1] in {'\0', '\x09'..'\x0D', ' '}:
|
|
|
|
|
if g.buf[pos] == '-' and g.buf[pos + 1] in {'\0', '\t'..'\r', ' '}:
|
|
|
|
|
inc(pos)
|
|
|
|
|
g.kind = gtKeyword
|
|
|
|
|
else: yamlPossibleNumber(g, pos)
|
|
|
|
|
else: yamlPossibleNumber(g, pos)
|
|
|
|
|
of '.':
|
|
|
|
|
if pos == 0 or g.buf[pos - 1] in {'\x0A', '\x0D'}:
|
|
|
|
|
if pos == 0 or g.buf[pos - 1] in {'\n', '\r'}:
|
|
|
|
|
inc(pos)
|
|
|
|
|
for i in 1..2:
|
|
|
|
|
if g.buf[pos] != '.': break
|
|
|
|
|
@@ -812,12 +812,12 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
else: yamlPlainStrLit(g, pos)
|
|
|
|
|
of '?':
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] in {'\0', ' ', '\x09'..'\x0D'}:
|
|
|
|
|
if g.buf[pos] in {'\0', ' ', '\t'..'\r'}:
|
|
|
|
|
g.kind = gtPunctuation
|
|
|
|
|
else: yamlPlainStrLit(g, pos)
|
|
|
|
|
of ':':
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] in {'\0', '\x09'..'\x0D', ' ', '\'', '\"'} or
|
|
|
|
|
if g.buf[pos] in {'\0', '\t'..'\r', ' ', '\'', '\"'} or
|
|
|
|
|
(pos > 0 and g.buf[pos - 2] in {'}', ']', '\"', '\''}):
|
|
|
|
|
g.kind = gtPunctuation
|
|
|
|
|
else: yamlPlainStrLit(g, pos)
|
|
|
|
|
@@ -836,7 +836,7 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
inc(pos)
|
|
|
|
|
if g.buf[pos] == '<':
|
|
|
|
|
# literal tag (e.g. `!<tag:yaml.org,2002:str>`)
|
|
|
|
|
while g.buf[pos] notin {'\0', '>', '\x09'..'\x0D', ' '}: inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '>', '\t'..'\r', ' '}: inc(pos)
|
|
|
|
|
if g.buf[pos] == '>': inc(pos)
|
|
|
|
|
else:
|
|
|
|
|
while g.buf[pos] in {'A'..'Z', 'a'..'z', '0'..'9', '-'}: inc(pos)
|
|
|
|
|
@@ -845,17 +845,17 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
# prefixed tag (e.g. `!!str`)
|
|
|
|
|
inc(pos)
|
|
|
|
|
while g.buf[pos] notin
|
|
|
|
|
{'\0', '\x09'..'\x0D', ' ', ',', '[', ']', '{', '}'}: inc(pos)
|
|
|
|
|
of '\0', '\x09'..'\x0D', ' ': discard
|
|
|
|
|
{'\0', '\t'..'\r', ' ', ',', '[', ']', '{', '}'}: inc(pos)
|
|
|
|
|
of '\0', '\t'..'\r', ' ': discard
|
|
|
|
|
else:
|
|
|
|
|
# local tag (e.g. `!nim:system:int`)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x09'..'\x0D', ' '}: inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\t'..'\r', ' '}: inc(pos)
|
|
|
|
|
of '&':
|
|
|
|
|
g.kind = gtLabel
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x09'..'\x0D', ' '}: inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\t'..'\r', ' '}: inc(pos)
|
|
|
|
|
of '*':
|
|
|
|
|
g.kind = gtReference
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x09'..'\x0D', ' '}: inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\t'..'\r', ' '}: inc(pos)
|
|
|
|
|
of '|', '>':
|
|
|
|
|
# this can lead to incorrect tokenization when | or > appear inside flow
|
|
|
|
|
# content. checking whether we're inside flow content is not
|
|
|
|
|
@@ -871,18 +871,18 @@ proc yamlNextToken(g: var GeneralTokenizer) =
|
|
|
|
|
# outside document
|
|
|
|
|
case g.buf[pos]
|
|
|
|
|
of '%':
|
|
|
|
|
if pos == 0 or g.buf[pos - 1] in {'\x0A', '\x0D'}:
|
|
|
|
|
if pos == 0 or g.buf[pos - 1] in {'\n', '\r'}:
|
|
|
|
|
g.kind = gtDirective
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\n', '\r'}: inc(pos)
|
|
|
|
|
else:
|
|
|
|
|
g.state = gtOther
|
|
|
|
|
yamlPlainStrLit(g, pos)
|
|
|
|
|
of ' ', '\x09'..'\x0D':
|
|
|
|
|
of ' ', '\t'..'\r':
|
|
|
|
|
g.kind = gtWhitespace
|
|
|
|
|
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] in {' ', '\t'..'\r'}: inc(pos)
|
|
|
|
|
of '#':
|
|
|
|
|
g.kind = gtComment
|
|
|
|
|
while g.buf[pos] notin {'\0', '\x0A', '\x0D'}: inc(pos)
|
|
|
|
|
while g.buf[pos] notin {'\0', '\n', '\r'}: inc(pos)
|
|
|
|
|
of '\0': g.kind = gtEof
|
|
|
|
|
else:
|
|
|
|
|
g.kind = gtNone
|
|
|
|
|
|