mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-22 15:25:22 +00:00
move parsejson logic into its own standard lib module
This commit is contained in:
@@ -89,506 +89,21 @@
|
||||
## echo j2
|
||||
|
||||
import
|
||||
hashes, tables, strutils, lexbase, streams, unicode, macros
|
||||
hashes, tables, strutils, lexbase, streams, unicode, macros, parsejson
|
||||
|
||||
export
|
||||
tables.`$`
|
||||
|
||||
export
|
||||
parsejson.JsonEventKind, parsejson.JsonError, JsonParser, JsonKindError,
|
||||
open, close, str, getInt, getFloat, kind, getColumn, getLine, getFilename,
|
||||
errorMsg, errorMsgExpected, next, JsonParsingError, raiseParseErr
|
||||
|
||||
when defined(nimJsonGet):
|
||||
{.pragma: deprecatedGet, deprecated.}
|
||||
else:
|
||||
{.pragma: deprecatedGet.}
|
||||
|
||||
type
|
||||
JsonEventKind* = enum ## enumeration of all events that may occur when parsing
|
||||
jsonError, ## an error occurred during parsing
|
||||
jsonEof, ## end of file reached
|
||||
jsonString, ## a string literal
|
||||
jsonInt, ## an integer literal
|
||||
jsonFloat, ## a float literal
|
||||
jsonTrue, ## the value ``true``
|
||||
jsonFalse, ## the value ``false``
|
||||
jsonNull, ## the value ``null``
|
||||
jsonObjectStart, ## start of an object: the ``{`` token
|
||||
jsonObjectEnd, ## end of an object: the ``}`` token
|
||||
jsonArrayStart, ## start of an array: the ``[`` token
|
||||
jsonArrayEnd ## start of an array: the ``]`` token
|
||||
|
||||
TokKind = enum # must be synchronized with TJsonEventKind!
|
||||
tkError,
|
||||
tkEof,
|
||||
tkString,
|
||||
tkInt,
|
||||
tkFloat,
|
||||
tkTrue,
|
||||
tkFalse,
|
||||
tkNull,
|
||||
tkCurlyLe,
|
||||
tkCurlyRi,
|
||||
tkBracketLe,
|
||||
tkBracketRi,
|
||||
tkColon,
|
||||
tkComma
|
||||
|
||||
JsonError* = enum ## enumeration that lists all errors that can occur
|
||||
errNone, ## no error
|
||||
errInvalidToken, ## invalid token
|
||||
errStringExpected, ## string expected
|
||||
errColonExpected, ## ``:`` expected
|
||||
errCommaExpected, ## ``,`` expected
|
||||
errBracketRiExpected, ## ``]`` expected
|
||||
errCurlyRiExpected, ## ``}`` expected
|
||||
errQuoteExpected, ## ``"`` or ``'`` expected
|
||||
errEOC_Expected, ## ``*/`` expected
|
||||
errEofExpected, ## EOF expected
|
||||
errExprExpected ## expr expected
|
||||
|
||||
ParserState = enum
|
||||
stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma,
|
||||
stateExpectObjectComma, stateExpectColon, stateExpectValue
|
||||
|
||||
JsonParser* = object of BaseLexer ## the parser object.
|
||||
a: string
|
||||
tok: TokKind
|
||||
kind: JsonEventKind
|
||||
err: JsonError
|
||||
state: seq[ParserState]
|
||||
filename: string
|
||||
|
||||
JsonKindError* = object of ValueError ## raised by the ``to`` macro if the
|
||||
## JSON kind is incorrect.
|
||||
|
||||
const
|
||||
errorMessages: array[JsonError, string] = [
|
||||
"no error",
|
||||
"invalid token",
|
||||
"string expected",
|
||||
"':' expected",
|
||||
"',' expected",
|
||||
"']' expected",
|
||||
"'}' expected",
|
||||
"'\"' or \"'\" expected",
|
||||
"'*/' expected",
|
||||
"EOF expected",
|
||||
"expression expected"
|
||||
]
|
||||
tokToStr: array[TokKind, string] = [
|
||||
"invalid token",
|
||||
"EOF",
|
||||
"string literal",
|
||||
"int literal",
|
||||
"float literal",
|
||||
"true",
|
||||
"false",
|
||||
"null",
|
||||
"{", "}", "[", "]", ":", ","
|
||||
]
|
||||
|
||||
proc open*(my: var JsonParser, input: Stream, filename: string) =
|
||||
## initializes the parser with an input stream. `Filename` is only used
|
||||
## for nice error messages.
|
||||
lexbase.open(my, input)
|
||||
my.filename = filename
|
||||
my.state = @[stateStart]
|
||||
my.kind = jsonError
|
||||
my.a = ""
|
||||
|
||||
proc close*(my: var JsonParser) {.inline.} =
|
||||
## closes the parser `my` and its associated input stream.
|
||||
lexbase.close(my)
|
||||
|
||||
proc str*(my: JsonParser): string {.inline.} =
|
||||
## returns the character data for the events: ``jsonInt``, ``jsonFloat``,
|
||||
## ``jsonString``
|
||||
assert(my.kind in {jsonInt, jsonFloat, jsonString})
|
||||
return my.a
|
||||
|
||||
proc getInt*(my: JsonParser): BiggestInt {.inline.} =
|
||||
## returns the number for the event: ``jsonInt``
|
||||
assert(my.kind == jsonInt)
|
||||
return parseBiggestInt(my.a)
|
||||
|
||||
proc getFloat*(my: JsonParser): float {.inline.} =
|
||||
## returns the number for the event: ``jsonFloat``
|
||||
assert(my.kind == jsonFloat)
|
||||
return parseFloat(my.a)
|
||||
|
||||
proc kind*(my: JsonParser): JsonEventKind {.inline.} =
|
||||
## returns the current event type for the JSON parser
|
||||
return my.kind
|
||||
|
||||
proc getColumn*(my: JsonParser): int {.inline.} =
|
||||
## get the current column the parser has arrived at.
|
||||
result = getColNumber(my, my.bufpos)
|
||||
|
||||
proc getLine*(my: JsonParser): int {.inline.} =
|
||||
## get the current line the parser has arrived at.
|
||||
result = my.lineNumber
|
||||
|
||||
proc getFilename*(my: JsonParser): string {.inline.} =
|
||||
## get the filename of the file that the parser processes.
|
||||
result = my.filename
|
||||
|
||||
proc errorMsg*(my: JsonParser): string =
|
||||
## returns a helpful error message for the event ``jsonError``
|
||||
assert(my.kind == jsonError)
|
||||
result = "$1($2, $3) Error: $4" % [
|
||||
my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]]
|
||||
|
||||
proc errorMsgExpected*(my: JsonParser, e: string): string =
|
||||
## returns an error message "`e` expected" in the same format as the
|
||||
## other error messages
|
||||
result = "$1($2, $3) Error: $4" % [
|
||||
my.filename, $getLine(my), $getColumn(my), e & " expected"]
|
||||
|
||||
proc handleHexChar(c: char, x: var int): bool =
|
||||
result = true # Success
|
||||
case c
|
||||
of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
|
||||
of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
|
||||
of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
|
||||
else: result = false # error
|
||||
|
||||
proc parseEscapedUTF16(buf: cstring, pos: var int): int =
|
||||
result = 0
|
||||
#UTF-16 escape is always 4 bytes.
|
||||
for _ in 0..3:
|
||||
if handleHexChar(buf[pos], result):
|
||||
inc(pos)
|
||||
else:
|
||||
return -1
|
||||
|
||||
proc parseString(my: var JsonParser): TokKind =
|
||||
result = tkString
|
||||
var pos = my.bufpos + 1
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
my.err = errQuoteExpected
|
||||
result = tkError
|
||||
break
|
||||
of '"':
|
||||
inc(pos)
|
||||
break
|
||||
of '\\':
|
||||
case buf[pos+1]
|
||||
of '\\', '"', '\'', '/':
|
||||
add(my.a, buf[pos+1])
|
||||
inc(pos, 2)
|
||||
of 'b':
|
||||
add(my.a, '\b')
|
||||
inc(pos, 2)
|
||||
of 'f':
|
||||
add(my.a, '\f')
|
||||
inc(pos, 2)
|
||||
of 'n':
|
||||
add(my.a, '\L')
|
||||
inc(pos, 2)
|
||||
of 'r':
|
||||
add(my.a, '\C')
|
||||
inc(pos, 2)
|
||||
of 't':
|
||||
add(my.a, '\t')
|
||||
inc(pos, 2)
|
||||
of 'u':
|
||||
inc(pos, 2)
|
||||
var r = parseEscapedUTF16(buf, pos)
|
||||
if r < 0:
|
||||
my.err = errInvalidToken
|
||||
break
|
||||
# Deal with surrogates
|
||||
if (r and 0xfc00) == 0xd800:
|
||||
if buf[pos] & buf[pos+1] != "\\u":
|
||||
my.err = errInvalidToken
|
||||
break
|
||||
inc(pos, 2)
|
||||
var s = parseEscapedUTF16(buf, pos)
|
||||
if (s and 0xfc00) == 0xdc00 and s > 0:
|
||||
r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00))
|
||||
else:
|
||||
my.err = errInvalidToken
|
||||
break
|
||||
add(my.a, toUTF8(Rune(r)))
|
||||
else:
|
||||
# don't bother with the error
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\c')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos # store back
|
||||
|
||||
proc skip(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '/':
|
||||
if buf[pos+1] == '/':
|
||||
# skip line comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
elif buf[pos+1] == '*':
|
||||
# skip long comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
my.err = errEOC_Expected
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
of '*':
|
||||
inc(pos)
|
||||
if buf[pos] == '/':
|
||||
inc(pos)
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
else:
|
||||
break
|
||||
of ' ', '\t':
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
else:
|
||||
break
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseNumber(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] == '-':
|
||||
add(my.a, '-')
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
add(my.a, "0.")
|
||||
inc(pos)
|
||||
else:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
add(my.a, '.')
|
||||
inc(pos)
|
||||
# digits after the dot:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'E', 'e'}:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'+', '-'}:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseName(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] in IdentStartChars:
|
||||
while buf[pos] in IdentChars:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
proc getTok(my: var JsonParser): TokKind =
|
||||
setLen(my.a, 0)
|
||||
skip(my) # skip whitespace, comments
|
||||
case my.buf[my.bufpos]
|
||||
of '-', '.', '0'..'9':
|
||||
parseNumber(my)
|
||||
if {'.', 'e', 'E'} in my.a:
|
||||
result = tkFloat
|
||||
else:
|
||||
result = tkInt
|
||||
of '"':
|
||||
result = parseString(my)
|
||||
of '[':
|
||||
inc(my.bufpos)
|
||||
result = tkBracketLe
|
||||
of '{':
|
||||
inc(my.bufpos)
|
||||
result = tkCurlyLe
|
||||
of ']':
|
||||
inc(my.bufpos)
|
||||
result = tkBracketRi
|
||||
of '}':
|
||||
inc(my.bufpos)
|
||||
result = tkCurlyRi
|
||||
of ',':
|
||||
inc(my.bufpos)
|
||||
result = tkComma
|
||||
of ':':
|
||||
inc(my.bufpos)
|
||||
result = tkColon
|
||||
of '\0':
|
||||
result = tkEof
|
||||
of 'a'..'z', 'A'..'Z', '_':
|
||||
parseName(my)
|
||||
case my.a
|
||||
of "null": result = tkNull
|
||||
of "true": result = tkTrue
|
||||
of "false": result = tkFalse
|
||||
else: result = tkError
|
||||
else:
|
||||
inc(my.bufpos)
|
||||
result = tkError
|
||||
my.tok = result
|
||||
|
||||
proc next*(my: var JsonParser) =
|
||||
## retrieves the first/next event. This controls the parser.
|
||||
var tk = getTok(my)
|
||||
var i = my.state.len-1
|
||||
# the following code is a state machine. If we had proper coroutines,
|
||||
# the code could be much simpler.
|
||||
case my.state[i]
|
||||
of stateEof:
|
||||
if tk == tkEof:
|
||||
my.kind = jsonEof
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errEofExpected
|
||||
of stateStart:
|
||||
# tokens allowed?
|
||||
case tk
|
||||
of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
my.state[i] = stateEof # expect EOF next!
|
||||
my.kind = JsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateArray) # we expect any
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkEof:
|
||||
my.kind = jsonEof
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errEofExpected
|
||||
of stateObject:
|
||||
case tk
|
||||
of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
my.state.add(stateExpectColon)
|
||||
my.kind = JsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateExpectColon)
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateExpectColon)
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkCurlyRi:
|
||||
my.kind = jsonObjectEnd
|
||||
discard my.state.pop()
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errCurlyRiExpected
|
||||
of stateArray:
|
||||
case tk
|
||||
of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
my.state.add(stateExpectArrayComma) # expect value next!
|
||||
my.kind = JsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateExpectArrayComma)
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateExpectArrayComma)
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkBracketRi:
|
||||
my.kind = jsonArrayEnd
|
||||
discard my.state.pop()
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errBracketRiExpected
|
||||
of stateExpectArrayComma:
|
||||
case tk
|
||||
of tkComma:
|
||||
discard my.state.pop()
|
||||
next(my)
|
||||
of tkBracketRi:
|
||||
my.kind = jsonArrayEnd
|
||||
discard my.state.pop() # pop stateExpectArrayComma
|
||||
discard my.state.pop() # pop stateArray
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errBracketRiExpected
|
||||
of stateExpectObjectComma:
|
||||
case tk
|
||||
of tkComma:
|
||||
discard my.state.pop()
|
||||
next(my)
|
||||
of tkCurlyRi:
|
||||
my.kind = jsonObjectEnd
|
||||
discard my.state.pop() # pop stateExpectObjectComma
|
||||
discard my.state.pop() # pop stateObject
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errCurlyRiExpected
|
||||
of stateExpectColon:
|
||||
case tk
|
||||
of tkColon:
|
||||
my.state[i] = stateExpectValue
|
||||
next(my)
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errColonExpected
|
||||
of stateExpectValue:
|
||||
case tk
|
||||
of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.kind = JsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errExprExpected
|
||||
|
||||
|
||||
# ------------- higher level interface ---------------------------------------
|
||||
|
||||
type
|
||||
JsonNodeKind* = enum ## possible JSON node types
|
||||
JNull,
|
||||
@@ -617,12 +132,6 @@ type
|
||||
of JArray:
|
||||
elems*: seq[JsonNode]
|
||||
|
||||
JsonParsingError* = object of ValueError ## is raised for a JSON error
|
||||
|
||||
proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} =
|
||||
## raises an `EJsonParsingError` exception.
|
||||
raise newException(JsonParsingError, errorMsgExpected(p, msg))
|
||||
|
||||
proc newJString*(s: string): JsonNode =
|
||||
## Creates a new `JString JsonNode`.
|
||||
new(result)
|
||||
@@ -1194,10 +703,6 @@ iterator mpairs*(node: var JsonNode): tuple[key: string, val: var JsonNode] =
|
||||
for key, val in mpairs(node.fields):
|
||||
yield (key, val)
|
||||
|
||||
proc eat(p: var JsonParser, tok: TokKind) =
|
||||
if p.tok == tok: discard getTok(p)
|
||||
else: raiseParseErr(p, tokToStr[tok])
|
||||
|
||||
proc parseJson(p: var JsonParser): JsonNode =
|
||||
## Parses JSON from a JSON Parser `p`.
|
||||
case p.tok
|
||||
@@ -1253,10 +758,12 @@ when not defined(js):
|
||||
## If `s` contains extra data, it will raise `JsonParsingError`.
|
||||
var p: JsonParser
|
||||
p.open(s, filename)
|
||||
defer: p.close()
|
||||
discard getTok(p) # read first token
|
||||
result = p.parseJson()
|
||||
eat(p, tkEof) # check if there is no extra data
|
||||
try:
|
||||
discard getTok(p) # read first token
|
||||
result = p.parseJson()
|
||||
eat(p, tkEof) # check if there is no extra data
|
||||
finally:
|
||||
p.close()
|
||||
|
||||
proc parseJson*(buffer: string): JsonNode =
|
||||
## Parses JSON from `buffer`.
|
||||
|
||||
535
lib/pure/parsejson.nim
Normal file
535
lib/pure/parsejson.nim
Normal file
@@ -0,0 +1,535 @@
|
||||
#
|
||||
#
|
||||
# Nim's Runtime Library
|
||||
# (c) Copyright 2018 Nim contributors
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## This module implements a json parser. It is used
|
||||
## and exported by the ``json`` standard library
|
||||
## module, but can also be used in its own right.
|
||||
|
||||
import
|
||||
strutils, lexbase, streams, unicode
|
||||
|
||||
type
|
||||
JsonEventKind* = enum ## enumeration of all events that may occur when parsing
|
||||
jsonError, ## an error occurred during parsing
|
||||
jsonEof, ## end of file reached
|
||||
jsonString, ## a string literal
|
||||
jsonInt, ## an integer literal
|
||||
jsonFloat, ## a float literal
|
||||
jsonTrue, ## the value ``true``
|
||||
jsonFalse, ## the value ``false``
|
||||
jsonNull, ## the value ``null``
|
||||
jsonObjectStart, ## start of an object: the ``{`` token
|
||||
jsonObjectEnd, ## end of an object: the ``}`` token
|
||||
jsonArrayStart, ## start of an array: the ``[`` token
|
||||
jsonArrayEnd ## start of an array: the ``]`` token
|
||||
|
||||
TokKind* = enum # must be synchronized with TJsonEventKind!
|
||||
tkError,
|
||||
tkEof,
|
||||
tkString,
|
||||
tkInt,
|
||||
tkFloat,
|
||||
tkTrue,
|
||||
tkFalse,
|
||||
tkNull,
|
||||
tkCurlyLe,
|
||||
tkCurlyRi,
|
||||
tkBracketLe,
|
||||
tkBracketRi,
|
||||
tkColon,
|
||||
tkComma
|
||||
|
||||
JsonError* = enum ## enumeration that lists all errors that can occur
|
||||
errNone, ## no error
|
||||
errInvalidToken, ## invalid token
|
||||
errStringExpected, ## string expected
|
||||
errColonExpected, ## ``:`` expected
|
||||
errCommaExpected, ## ``,`` expected
|
||||
errBracketRiExpected, ## ``]`` expected
|
||||
errCurlyRiExpected, ## ``}`` expected
|
||||
errQuoteExpected, ## ``"`` or ``'`` expected
|
||||
errEOC_Expected, ## ``*/`` expected
|
||||
errEofExpected, ## EOF expected
|
||||
errExprExpected ## expr expected
|
||||
|
||||
ParserState = enum
|
||||
stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma,
|
||||
stateExpectObjectComma, stateExpectColon, stateExpectValue
|
||||
|
||||
JsonParser* = object of BaseLexer ## the parser object.
|
||||
a*: string
|
||||
tok*: TokKind
|
||||
kind: JsonEventKind
|
||||
err: JsonError
|
||||
state: seq[ParserState]
|
||||
filename: string
|
||||
rawStringLiterals: bool
|
||||
|
||||
JsonKindError* = object of ValueError ## raised by the ``to`` macro if the
|
||||
## JSON kind is incorrect.
|
||||
JsonParsingError* = object of ValueError ## is raised for a JSON error
|
||||
|
||||
const
|
||||
errorMessages*: array[JsonError, string] = [
|
||||
"no error",
|
||||
"invalid token",
|
||||
"string expected",
|
||||
"':' expected",
|
||||
"',' expected",
|
||||
"']' expected",
|
||||
"'}' expected",
|
||||
"'\"' or \"'\" expected",
|
||||
"'*/' expected",
|
||||
"EOF expected",
|
||||
"expression expected"
|
||||
]
|
||||
tokToStr: array[TokKind, string] = [
|
||||
"invalid token",
|
||||
"EOF",
|
||||
"string literal",
|
||||
"int literal",
|
||||
"float literal",
|
||||
"true",
|
||||
"false",
|
||||
"null",
|
||||
"{", "}", "[", "]", ":", ","
|
||||
]
|
||||
|
||||
proc open*(my: var JsonParser, input: Stream, filename: string;
|
||||
rawStringLiterals = false) =
|
||||
## initializes the parser with an input stream. `Filename` is only used
|
||||
## for nice error messages. If `rawStringLiterals` is true, string literals
|
||||
## are kepts with their surrounding quotes and escape sequences in them are
|
||||
## left untouched too.
|
||||
lexbase.open(my, input)
|
||||
my.filename = filename
|
||||
my.state = @[stateStart]
|
||||
my.kind = jsonError
|
||||
my.a = ""
|
||||
my.rawStringLiterals = rawStringLiterals
|
||||
|
||||
proc close*(my: var JsonParser) {.inline.} =
|
||||
## closes the parser `my` and its associated input stream.
|
||||
lexbase.close(my)
|
||||
|
||||
proc str*(my: JsonParser): string {.inline.} =
|
||||
## returns the character data for the events: ``jsonInt``, ``jsonFloat``,
|
||||
## ``jsonString``
|
||||
assert(my.kind in {jsonInt, jsonFloat, jsonString})
|
||||
return my.a
|
||||
|
||||
proc getInt*(my: JsonParser): BiggestInt {.inline.} =
|
||||
## returns the number for the event: ``jsonInt``
|
||||
assert(my.kind == jsonInt)
|
||||
return parseBiggestInt(my.a)
|
||||
|
||||
proc getFloat*(my: JsonParser): float {.inline.} =
|
||||
## returns the number for the event: ``jsonFloat``
|
||||
assert(my.kind == jsonFloat)
|
||||
return parseFloat(my.a)
|
||||
|
||||
proc kind*(my: JsonParser): JsonEventKind {.inline.} =
|
||||
## returns the current event type for the JSON parser
|
||||
return my.kind
|
||||
|
||||
proc getColumn*(my: JsonParser): int {.inline.} =
|
||||
## get the current column the parser has arrived at.
|
||||
result = getColNumber(my, my.bufpos)
|
||||
|
||||
proc getLine*(my: JsonParser): int {.inline.} =
|
||||
## get the current line the parser has arrived at.
|
||||
result = my.lineNumber
|
||||
|
||||
proc getFilename*(my: JsonParser): string {.inline.} =
|
||||
## get the filename of the file that the parser processes.
|
||||
result = my.filename
|
||||
|
||||
proc errorMsg*(my: JsonParser): string =
|
||||
## returns a helpful error message for the event ``jsonError``
|
||||
assert(my.kind == jsonError)
|
||||
result = "$1($2, $3) Error: $4" % [
|
||||
my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]]
|
||||
|
||||
proc errorMsgExpected*(my: JsonParser, e: string): string =
|
||||
## returns an error message "`e` expected" in the same format as the
|
||||
## other error messages
|
||||
result = "$1($2, $3) Error: $4" % [
|
||||
my.filename, $getLine(my), $getColumn(my), e & " expected"]
|
||||
|
||||
proc handleHexChar(c: char, x: var int): bool =
|
||||
result = true # Success
|
||||
case c
|
||||
of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
|
||||
of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
|
||||
of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
|
||||
else: result = false # error
|
||||
|
||||
proc parseEscapedUTF16*(buf: cstring, pos: var int): int =
|
||||
result = 0
|
||||
#UTF-16 escape is always 4 bytes.
|
||||
for _ in 0..3:
|
||||
if handleHexChar(buf[pos], result):
|
||||
inc(pos)
|
||||
else:
|
||||
return -1
|
||||
|
||||
proc parseString(my: var JsonParser): TokKind =
|
||||
result = tkString
|
||||
var pos = my.bufpos + 1
|
||||
var buf = my.buf
|
||||
if my.rawStringLiterals:
|
||||
add(my.a, '"')
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
my.err = errQuoteExpected
|
||||
result = tkError
|
||||
break
|
||||
of '"':
|
||||
if my.rawStringLiterals:
|
||||
add(my.a, '"')
|
||||
inc(pos)
|
||||
break
|
||||
of '\\':
|
||||
if my.rawStringLiterals:
|
||||
add(my.a, '\\')
|
||||
case buf[pos+1]
|
||||
of '\\', '"', '\'', '/':
|
||||
add(my.a, buf[pos+1])
|
||||
inc(pos, 2)
|
||||
of 'b':
|
||||
add(my.a, '\b')
|
||||
inc(pos, 2)
|
||||
of 'f':
|
||||
add(my.a, '\f')
|
||||
inc(pos, 2)
|
||||
of 'n':
|
||||
add(my.a, '\L')
|
||||
inc(pos, 2)
|
||||
of 'r':
|
||||
add(my.a, '\C')
|
||||
inc(pos, 2)
|
||||
of 't':
|
||||
add(my.a, '\t')
|
||||
inc(pos, 2)
|
||||
of 'u':
|
||||
if my.rawStringLiterals:
|
||||
add(my.a, 'u')
|
||||
inc(pos, 2)
|
||||
var pos2 = pos
|
||||
var r = parseEscapedUTF16(buf, pos)
|
||||
if r < 0:
|
||||
my.err = errInvalidToken
|
||||
break
|
||||
# Deal with surrogates
|
||||
if (r and 0xfc00) == 0xd800:
|
||||
if buf[pos] != '\\' or buf[pos+1] != 'u':
|
||||
my.err = errInvalidToken
|
||||
break
|
||||
inc(pos, 2)
|
||||
var s = parseEscapedUTF16(buf, pos)
|
||||
if (s and 0xfc00) == 0xdc00 and s > 0:
|
||||
r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00))
|
||||
else:
|
||||
my.err = errInvalidToken
|
||||
break
|
||||
if my.rawStringLiterals:
|
||||
let length = pos - pos2
|
||||
for i in 1 .. length:
|
||||
if buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}:
|
||||
add(my.a, buf[pos2])
|
||||
inc pos2
|
||||
else:
|
||||
break
|
||||
else:
|
||||
add(my.a, toUTF8(Rune(r)))
|
||||
else:
|
||||
# don't bother with the error
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\c')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos # store back
|
||||
|
||||
proc skip(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '/':
|
||||
if buf[pos+1] == '/':
|
||||
# skip line comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
elif buf[pos+1] == '*':
|
||||
# skip long comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
my.err = errEOC_Expected
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
of '*':
|
||||
inc(pos)
|
||||
if buf[pos] == '/':
|
||||
inc(pos)
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
else:
|
||||
break
|
||||
of ' ', '\t':
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
else:
|
||||
break
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseNumber(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] == '-':
|
||||
add(my.a, '-')
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
add(my.a, "0.")
|
||||
inc(pos)
|
||||
else:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
add(my.a, '.')
|
||||
inc(pos)
|
||||
# digits after the dot:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'E', 'e'}:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'+', '-'}:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseName(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] in IdentStartChars:
|
||||
while buf[pos] in IdentChars:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
proc getTok*(my: var JsonParser): TokKind =
|
||||
setLen(my.a, 0)
|
||||
skip(my) # skip whitespace, comments
|
||||
case my.buf[my.bufpos]
|
||||
of '-', '.', '0'..'9':
|
||||
parseNumber(my)
|
||||
if {'.', 'e', 'E'} in my.a:
|
||||
result = tkFloat
|
||||
else:
|
||||
result = tkInt
|
||||
of '"':
|
||||
result = parseString(my)
|
||||
of '[':
|
||||
inc(my.bufpos)
|
||||
result = tkBracketLe
|
||||
of '{':
|
||||
inc(my.bufpos)
|
||||
result = tkCurlyLe
|
||||
of ']':
|
||||
inc(my.bufpos)
|
||||
result = tkBracketRi
|
||||
of '}':
|
||||
inc(my.bufpos)
|
||||
result = tkCurlyRi
|
||||
of ',':
|
||||
inc(my.bufpos)
|
||||
result = tkComma
|
||||
of ':':
|
||||
inc(my.bufpos)
|
||||
result = tkColon
|
||||
of '\0':
|
||||
result = tkEof
|
||||
of 'a'..'z', 'A'..'Z', '_':
|
||||
parseName(my)
|
||||
case my.a
|
||||
of "null": result = tkNull
|
||||
of "true": result = tkTrue
|
||||
of "false": result = tkFalse
|
||||
else: result = tkError
|
||||
else:
|
||||
inc(my.bufpos)
|
||||
result = tkError
|
||||
my.tok = result
|
||||
|
||||
|
||||
proc next*(my: var JsonParser) =
|
||||
## retrieves the first/next event. This controls the parser.
|
||||
var tk = getTok(my)
|
||||
var i = my.state.len-1
|
||||
# the following code is a state machine. If we had proper coroutines,
|
||||
# the code could be much simpler.
|
||||
case my.state[i]
|
||||
of stateEof:
|
||||
if tk == tkEof:
|
||||
my.kind = jsonEof
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errEofExpected
|
||||
of stateStart:
|
||||
# tokens allowed?
|
||||
case tk
|
||||
of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
my.state[i] = stateEof # expect EOF next!
|
||||
my.kind = JsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateArray) # we expect any
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkEof:
|
||||
my.kind = jsonEof
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errEofExpected
|
||||
of stateObject:
|
||||
case tk
|
||||
of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
my.state.add(stateExpectColon)
|
||||
my.kind = JsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateExpectColon)
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateExpectColon)
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkCurlyRi:
|
||||
my.kind = jsonObjectEnd
|
||||
discard my.state.pop()
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errCurlyRiExpected
|
||||
of stateArray:
|
||||
case tk
|
||||
of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
my.state.add(stateExpectArrayComma) # expect value next!
|
||||
my.kind = JsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateExpectArrayComma)
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateExpectArrayComma)
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkBracketRi:
|
||||
my.kind = jsonArrayEnd
|
||||
discard my.state.pop()
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errBracketRiExpected
|
||||
of stateExpectArrayComma:
|
||||
case tk
|
||||
of tkComma:
|
||||
discard my.state.pop()
|
||||
next(my)
|
||||
of tkBracketRi:
|
||||
my.kind = jsonArrayEnd
|
||||
discard my.state.pop() # pop stateExpectArrayComma
|
||||
discard my.state.pop() # pop stateArray
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errBracketRiExpected
|
||||
of stateExpectObjectComma:
|
||||
case tk
|
||||
of tkComma:
|
||||
discard my.state.pop()
|
||||
next(my)
|
||||
of tkCurlyRi:
|
||||
my.kind = jsonObjectEnd
|
||||
discard my.state.pop() # pop stateExpectObjectComma
|
||||
discard my.state.pop() # pop stateObject
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errCurlyRiExpected
|
||||
of stateExpectColon:
|
||||
case tk
|
||||
of tkColon:
|
||||
my.state[i] = stateExpectValue
|
||||
next(my)
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errColonExpected
|
||||
of stateExpectValue:
|
||||
case tk
|
||||
of tkString, tkInt, tkFloat, tkTrue, tkFalse, tkNull:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.kind = JsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errExprExpected
|
||||
|
||||
proc raiseParseErr*(p: JsonParser, msg: string) {.noinline, noreturn.} =
|
||||
## raises an `EJsonParsingError` exception.
|
||||
raise newException(JsonParsingError, errorMsgExpected(p, msg))
|
||||
|
||||
proc eat*(p: var JsonParser, tok: TokKind) =
|
||||
if p.tok == tok: discard getTok(p)
|
||||
else: raiseParseErr(p, tokToStr[tok])
|
||||
Reference in New Issue
Block a user