mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-28 17:04:41 +00:00
Merge branch 'master' of github.com:Araq/Nimrod
This commit is contained in:
726
lib/pure/json.nim
Executable file → Normal file
726
lib/pure/json.nim
Executable file → Normal file
@@ -1,484 +1,284 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2010 Andreas Rumpf
|
||||
# (c) Copyright 2010 Andreas Rumpf, Dominik Picheta
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## This module implements a simple high performance `JSON`:idx:
|
||||
## parser. JSON (JavaScript Object Notation) is a lightweight
|
||||
## data-interchange format that is easy for humans to read and write
|
||||
## (unlike XML). It is easy for machines to parse and generate.
|
||||
## JSON is based on a subset of the JavaScript Programming Language,
|
||||
## Standard ECMA-262 3rd Edition - December 1999.
|
||||
import parsejson, streams, strutils
|
||||
|
||||
import
|
||||
hashes, strutils, lexbase, streams, unicode
|
||||
|
||||
type
|
||||
TJsonEventKind* = enum ## enumation of all events that may occur when parsing
|
||||
jsonError, ## an error ocurred during parsing
|
||||
jsonEof, ## end of file reached
|
||||
jsonString, ## a string literal
|
||||
jsonNumber, ## a number literal
|
||||
jsonTrue, ## the value ``true``
|
||||
jsonFalse, ## the value ``false``
|
||||
jsonNull, ## the value ``null``
|
||||
jsonObjectStart, ## start of an object: the ``{`` token
|
||||
jsonObjectEnd, ## end of an object: the ``}`` token
|
||||
jsonArrayStart, ## start of an array: the ``[`` token
|
||||
jsonArrayEnd ## start of an array: the ``]`` token
|
||||
type
|
||||
TJsonNodeKind* = enum
|
||||
JString,
|
||||
JNumber,
|
||||
JBool,
|
||||
JNull,
|
||||
JObject,
|
||||
JArray
|
||||
|
||||
TTokKind = enum # must be synchronized with TJsonEventKind!
|
||||
tkError,
|
||||
tkEof,
|
||||
tkString,
|
||||
tkNumber,
|
||||
tkTrue,
|
||||
tkFalse,
|
||||
tkNull,
|
||||
tkCurlyLe,
|
||||
tkCurlyRi,
|
||||
tkBracketLe,
|
||||
tkBracketRi,
|
||||
tkColon,
|
||||
tkComma
|
||||
|
||||
TJsonError* = enum ## enumeration that lists all errors that can occur
|
||||
errNone, ## no error
|
||||
errInvalidToken, ## invalid token
|
||||
errStringExpected, ## string expected
|
||||
errColonExpected, ## ``:`` expected
|
||||
errCommaExpected, ## ``,`` expected
|
||||
errBracketRiExpected, ## ``]`` expected
|
||||
errCurlyRiExpected, ## ``}`` expected
|
||||
errQuoteExpected, ## ``"`` or ``'`` expected
|
||||
errEOC_Expected, ## ``*/`` expected
|
||||
errEofExpected, ## EOF expected
|
||||
errExprExpected ## expr expected
|
||||
|
||||
TParserState = enum
|
||||
stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma,
|
||||
stateExpectObjectComma, stateExpectColon, stateExpectValue
|
||||
PJsonNode* = ref TJsonNode
|
||||
TJsonNode* = object
|
||||
case kind*: TJsonNodeKind
|
||||
of JString:
|
||||
str*: String
|
||||
of JNumber:
|
||||
num*: Float
|
||||
of JBool:
|
||||
bval*: Bool
|
||||
of JNull:
|
||||
nil
|
||||
of JObject:
|
||||
fields*: seq[tuple[key: string, obj: PJsonNode]]
|
||||
of JArray:
|
||||
elems*: seq[PJsonNode]
|
||||
|
||||
TJsonParser* = object of TBaseLexer ## the parser object.
|
||||
a: string
|
||||
kind: TJsonEventKind
|
||||
err: TJsonError
|
||||
state: seq[TParserState]
|
||||
filename: string
|
||||
|
||||
const
|
||||
errorMessages: array [TJsonError, string] = [
|
||||
"no error",
|
||||
"invalid token",
|
||||
"string expected",
|
||||
"':' expected",
|
||||
"',' expected",
|
||||
"']' expected",
|
||||
"'}' expected",
|
||||
"'\"' or \"'\" expected",
|
||||
"'*/' expected",
|
||||
"EOF expected",
|
||||
"expression expected"
|
||||
]
|
||||
EJsonParsingError* = object of EBase
|
||||
|
||||
proc open*(my: var TJsonParser, input: PStream, filename: string) =
|
||||
## initializes the parser with an input stream. `Filename` is only used
|
||||
## for nice error messages.
|
||||
lexbase.open(my, input)
|
||||
my.filename = filename
|
||||
my.state = @[stateStart]
|
||||
my.kind = jsonError
|
||||
my.a = ""
|
||||
|
||||
proc close*(my: var TJsonParser) {.inline.} =
|
||||
## closes the parser `my` and its associated input stream.
|
||||
lexbase.close(my)
|
||||
|
||||
proc str*(my: TJsonParser): string {.inline.} =
|
||||
## returns the character data for the events: ``jsonNumber``,
|
||||
## ``jsonString``
|
||||
assert(my.kind in {jsonNumber, jsonString})
|
||||
return my.a
|
||||
|
||||
proc number*(my: TJsonParser): float {.inline.} =
|
||||
## returns the number for the event: ``jsonNumber``
|
||||
assert(my.kind == jsonNumber)
|
||||
return parseFloat(my.a)
|
||||
|
||||
proc kind*(my: TJsonParser): TJsonEventKind {.inline.} =
|
||||
## returns the current event type for the JSON parser
|
||||
return my.kind
|
||||
|
||||
proc getColumn*(my: TJsonParser): int {.inline.} =
|
||||
## get the current column the parser has arrived at.
|
||||
result = getColNumber(my, my.bufPos)
|
||||
|
||||
proc getLine*(my: TJsonParser): int {.inline.} =
|
||||
## get the current line the parser has arrived at.
|
||||
result = my.linenumber
|
||||
|
||||
proc getFilename*(my: TJsonParser): string {.inline.} =
|
||||
## get the filename of the file that the parser processes.
|
||||
result = my.filename
|
||||
|
||||
proc errorMsg*(my: TJsonParser): string =
|
||||
## returns a helpful error message for the event ``jsonError``
|
||||
assert(my.kind == jsonError)
|
||||
result = "$1($2, $3) Error: $4" % [
|
||||
my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]]
|
||||
|
||||
proc errorMsgExpected*(my: TJsonParser, e: string): string =
|
||||
## returns an error message "`e` expected" in the same format as the
|
||||
## other error messages
|
||||
result = "$1($2, $3) Error: $4" % [
|
||||
my.filename, $getLine(my), $getColumn(my), e & " expected"]
|
||||
|
||||
proc handleHexChar(c: Char, x: var int): bool =
|
||||
result = true # Success
|
||||
case c
|
||||
of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
|
||||
of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
|
||||
of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
|
||||
else: result = false # error
|
||||
|
||||
proc parseString(my: var TJsonParser): TTokKind =
|
||||
result = tkString
|
||||
var pos = my.bufpos + 1
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
my.err = errQuoteExpected
|
||||
result = tkError
|
||||
break
|
||||
of '"':
|
||||
inc(pos)
|
||||
break
|
||||
of '\\':
|
||||
case buf[pos+1]
|
||||
of '\\', '"', '\'', '/':
|
||||
add(my.a, buf[pos+1])
|
||||
inc(pos, 2)
|
||||
of 'b':
|
||||
add(my.a, '\b')
|
||||
inc(pos, 2)
|
||||
of 'f':
|
||||
add(my.a, '\f')
|
||||
inc(pos, 2)
|
||||
of 'n':
|
||||
add(my.a, '\L')
|
||||
inc(pos, 2)
|
||||
of 'r':
|
||||
add(my.a, '\C')
|
||||
inc(pos, 2)
|
||||
of 't':
|
||||
add(my.a, '\t')
|
||||
inc(pos, 2)
|
||||
of 'u':
|
||||
inc(pos, 2)
|
||||
var r: int
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
add(my.a, toUTF8(TRune(r)))
|
||||
else:
|
||||
# don't bother with the error
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\c')
|
||||
of '\L':
|
||||
pos = lexbase.HandleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos # store back
|
||||
|
||||
proc skip(my: var TJsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '/':
|
||||
if buf[pos+1] == '/':
|
||||
# skip line comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
of '\L':
|
||||
pos = lexbase.HandleLF(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
elif buf[pos+1] == '*':
|
||||
# skip long comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
my.err = errEOC_Expected
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.HandleLF(my, pos)
|
||||
buf = my.buf
|
||||
of '*':
|
||||
inc(pos)
|
||||
if buf[pos] == '/':
|
||||
inc(pos)
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
else:
|
||||
break
|
||||
of ' ', '\t':
|
||||
Inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.HandleLF(my, pos)
|
||||
buf = my.buf
|
||||
else:
|
||||
break
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseNumber(my: var TJsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] == '-':
|
||||
add(my.a, '-')
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
add(my.a, "0.")
|
||||
inc(pos)
|
||||
proc raiseParseErr(parser: TJsonParser, msg: string, line = True) =
|
||||
if line:
|
||||
raise newException(EJsonParsingError, "(" & $parser.getLine & ", " &
|
||||
$parser.getColumn & ") " & msg)
|
||||
else:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
add(my.a, '.')
|
||||
inc(pos)
|
||||
# digits after the dot:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'E', 'e'}:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'+', '-'}:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
raise newException(EJsonParsingError, msg)
|
||||
|
||||
proc parseName(my: var TJsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] in IdentStartChars:
|
||||
while buf[pos] in IdentChars:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
proc indent(s: var string, i: int) =
|
||||
s.add(repeatChar(i))
|
||||
|
||||
proc getTok(my: var TJsonParser): TTokKind =
|
||||
setLen(my.a, 0)
|
||||
skip(my) # skip whitespace, comments
|
||||
case my.buf[my.bufpos]
|
||||
of '-', '.', '0'..'9':
|
||||
parseNumber(my)
|
||||
result = tkNumber
|
||||
of '"':
|
||||
result = parseString(my)
|
||||
of '[':
|
||||
inc(my.bufpos)
|
||||
result = tkBracketLe
|
||||
of '{':
|
||||
inc(my.bufpos)
|
||||
result = tkCurlyLe
|
||||
of ']':
|
||||
inc(my.bufpos)
|
||||
result = tkBracketRi
|
||||
of '}':
|
||||
inc(my.bufpos)
|
||||
result = tkCurlyRi
|
||||
of ',':
|
||||
inc(my.bufpos)
|
||||
result = tkComma
|
||||
of ':':
|
||||
inc(my.bufpos)
|
||||
result = tkColon
|
||||
of '\0':
|
||||
result = tkEof
|
||||
of 'a'..'z', 'A'..'Z', '_':
|
||||
parseName(my)
|
||||
case my.a
|
||||
of "null": result = tkNull
|
||||
of "true": result = tkTrue
|
||||
of "false": result = tkFalse
|
||||
else: result = tkError
|
||||
else:
|
||||
inc(my.bufpos)
|
||||
result = tkError
|
||||
proc newIndent(curr, indent: int, ml: bool): Int =
|
||||
if ml: return curr + indent
|
||||
else: return indent
|
||||
|
||||
proc next*(my: var TJsonParser) =
|
||||
## retrieves the first/next event. This controls the parser.
|
||||
var tk = getTok(my)
|
||||
var i = my.state.len-1
|
||||
# the following code is a state machine. If we had proper coroutines,
|
||||
# the code could be much simpler.
|
||||
case my.state[i]
|
||||
of stateEof:
|
||||
if tk == tkEof:
|
||||
my.kind = jsonEof
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errEofExpected
|
||||
of stateStart:
|
||||
# tokens allowed?
|
||||
case tk
|
||||
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
|
||||
my.state[i] = stateEof # expect EOF next!
|
||||
my.kind = TJsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateArray) # we expect any
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkEof:
|
||||
my.kind = jsonEof
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errEofExpected
|
||||
of stateObject:
|
||||
case tk
|
||||
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
|
||||
my.state.add(stateExpectColon)
|
||||
my.kind = TJsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateExpectColon)
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateExpectColon)
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkCurlyRi:
|
||||
my.kind = jsonObjectEnd
|
||||
discard my.state.pop()
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errCurlyRiExpected
|
||||
of stateArray:
|
||||
case tk
|
||||
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
|
||||
my.state.add(stateExpectArrayComma) # expect value next!
|
||||
my.kind = TJsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateExpectArrayComma)
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateExpectArrayComma)
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkBracketRi:
|
||||
my.kind = jsonArrayEnd
|
||||
discard my.state.pop()
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errBracketRiExpected
|
||||
of stateExpectArrayComma:
|
||||
case tk
|
||||
of tkComma:
|
||||
discard my.state.pop()
|
||||
next(my)
|
||||
of tkBracketRi:
|
||||
my.kind = jsonArrayEnd
|
||||
discard my.state.pop() # pop stateExpectArrayComma
|
||||
discard my.state.pop() # pop stateArray
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errBracketRiExpected
|
||||
of stateExpectObjectComma:
|
||||
case tk
|
||||
of tkComma:
|
||||
discard my.state.pop()
|
||||
next(my)
|
||||
of tkCurlyRi:
|
||||
my.kind = jsonObjectEnd
|
||||
discard my.state.pop() # pop stateExpectObjectComma
|
||||
discard my.state.pop() # pop stateObject
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errCurlyRiExpected
|
||||
of stateExpectColon:
|
||||
case tk
|
||||
of tkColon:
|
||||
my.state[i] = stateExpectValue
|
||||
next(my)
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errColonExpected
|
||||
of stateExpectValue:
|
||||
case tk
|
||||
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.kind = TJsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errExprExpected
|
||||
proc nl(s: var string, ml: bool) =
|
||||
if ml: s.add("\n")
|
||||
|
||||
proc toPretty(result: var string, node: PJsonNode, indent = 2, ml = True, lstArr = False, currIndent = 0) =
|
||||
case node.kind
|
||||
of JObject:
|
||||
if currIndent != 0 and not lstArr: result.nl(ml)
|
||||
result.indent(currIndent) # Indentation
|
||||
result.add("{")
|
||||
result.nl(ml) # New line
|
||||
for i in 0..len(node.fields)-1:
|
||||
if i > 0:
|
||||
result.add(", ")
|
||||
result.nl(ml) # New Line
|
||||
var (key, item) = node.fields[i]
|
||||
result.indent(newIndent(currIndent, indent, ml)) # Need to indent more than {
|
||||
result.add("\"" & key & "\": ")
|
||||
toPretty(result, item, indent, ml, False, newIndent(currIndent, indent, ml))
|
||||
result.nl(ml)
|
||||
result.indent(currIndent) # indent the same as {
|
||||
result.add("}")
|
||||
of JString:
|
||||
if lstArr: result.indent(currIndent)
|
||||
result.add("\"" & node.str & "\"")
|
||||
of JNumber:
|
||||
if lstArr: result.indent(currIndent)
|
||||
result.add($node.num)
|
||||
of JBool:
|
||||
if lstArr: result.indent(currIndent)
|
||||
result.add($node.bval)
|
||||
of JArray:
|
||||
if len(node.elems) != 0:
|
||||
result.add("[")
|
||||
result.nl(ml)
|
||||
for i in 0..len(node.elems)-1:
|
||||
if i > 0:
|
||||
result.add(", ")
|
||||
result.nl(ml) # New Line
|
||||
toPretty(result, node.elems[i], indent, ml,
|
||||
True, newIndent(currIndent, indent, ml))
|
||||
result.nl(ml)
|
||||
result.indent(currIndent)
|
||||
result.add("]")
|
||||
else: result.add("[]")
|
||||
of JNull:
|
||||
if lstArr: result.indent(currIndent)
|
||||
result.add("null")
|
||||
|
||||
proc pretty*(node: PJsonNode, indent = 2): String =
|
||||
## Converts a `PJsonNode` to its JSON Representation, with indentation and
|
||||
## on multiple lines.
|
||||
result = ""
|
||||
toPretty(result, node, indent)
|
||||
|
||||
proc `$`*(node: PJsonNode): String =
|
||||
## Converts a `PJsonNode` to its JSON Representation on one line.
|
||||
result = ""
|
||||
toPretty(result, node, 1, False)
|
||||
|
||||
proc newJString*(s: String): PJsonNode =
|
||||
## Creates a new `JString PJsonNode`
|
||||
new(result)
|
||||
result.kind = JString
|
||||
result.str = s
|
||||
|
||||
proc newJNumber*(n: Float): PJsonNode =
|
||||
## Creates a new `JNumber PJsonNode`
|
||||
new(result)
|
||||
result.kind = JNumber
|
||||
result.num = n
|
||||
|
||||
when isMainModule:
|
||||
import os
|
||||
var s = newFileStream(ParamStr(1), fmRead)
|
||||
if s == nil: quit("cannot open the file" & ParamStr(1))
|
||||
var x: TJsonParser
|
||||
open(x, s, ParamStr(1))
|
||||
while true:
|
||||
next(x)
|
||||
case x.kind
|
||||
proc newJBool*(b: Bool): PJsonNode =
|
||||
## Creates a new `JBool PJsonNode`
|
||||
new(result)
|
||||
result.kind = JBool
|
||||
result.bval = b
|
||||
|
||||
proc newJNull*(): PJsonNode =
|
||||
## Creates a new `JNull PJsonNode`
|
||||
new(result)
|
||||
result.kind = JNull
|
||||
|
||||
proc newJObject*(f: seq[tuple[key: string, obj: PJsonNode]]): PJsonNode =
|
||||
## Creates a new `JObject PJsonNode`
|
||||
new(result)
|
||||
result.kind = JObject
|
||||
result.fields = f
|
||||
|
||||
proc newJArray*(a: seq[PJsonNode]): PJsonNode =
|
||||
## Creates a new `JArray PJsonNode`
|
||||
new(result)
|
||||
result.kind = JArray
|
||||
result.elems = a
|
||||
|
||||
proc parseOther(parser: var TJsonParser): PJsonNode =
|
||||
# Parses a *single* node which is not an Array or Object.
|
||||
new(result)
|
||||
case parser.kind
|
||||
of jsonString:
|
||||
result = newJString(parser.str())
|
||||
of jsonNumber:
|
||||
result = newJNumber(parser.number())
|
||||
of jsonTrue, jsonFalse:
|
||||
result = newJBool((parser.kind == jsonTrue))
|
||||
of jsonNull:
|
||||
result = newJNull()
|
||||
of jsonError:
|
||||
parser.raiseParseErr(parser.errorMsg(), false)
|
||||
else: parser.raiseParseErr("Unexpected " & $parser.kind & " here.")
|
||||
|
||||
proc parseObj(parser: var TJSonParser, oStart: Bool = False): PJsonNode
|
||||
|
||||
proc parseArray(parser: var TJsonParser): PJsonNode =
|
||||
result = newJArray(@[])
|
||||
while True:
|
||||
parser.next()
|
||||
case parser.kind
|
||||
of jsonArrayStart:
|
||||
# Array in an array.
|
||||
var arr = parser.parseArray()
|
||||
result.elems.add(arr)
|
||||
of jsonArrayEnd:
|
||||
return
|
||||
of jsonString, jsonNumber, jsonTrue, jsonFalse, jsonNull:
|
||||
var other = parser.parseOther()
|
||||
result.elems.add(other)
|
||||
of jsonObjectStart:
|
||||
var obj = parser.parseObj(True)
|
||||
result.elems.add(obj)
|
||||
of jsonObjectEnd: parser.raiseParseErr("Unexpected }")
|
||||
of jsonEof: parser.raiseParseErr("Unexpected EOF.")
|
||||
of jsonError: parser.raiseParseErr(parser.errorMsg(), false)
|
||||
|
||||
proc parseObj(parser: var TJSonParser, oStart: Bool = False): PJsonNode =
|
||||
var key = ""
|
||||
var objStarted = oStart
|
||||
result = newJObject(@[])
|
||||
while True:
|
||||
parser.next()
|
||||
case parser.kind
|
||||
of jsonError:
|
||||
Echo(x.errorMsg())
|
||||
parser.raiseParseErr(parser.errorMsg(), false)
|
||||
break
|
||||
of jsonEof: break
|
||||
of jsonString, jsonNumber: echo(x.str)
|
||||
of jsonTrue: Echo("!TRUE")
|
||||
of jsonFalse: Echo("!FALSE")
|
||||
of jsonNull: Echo("!NULL")
|
||||
of jsonObjectStart: Echo("{")
|
||||
of jsonObjectEnd: Echo("}")
|
||||
of jsonArrayStart: Echo("[")
|
||||
of jsonArrayEnd: Echo("]")
|
||||
|
||||
close(x)
|
||||
of jsonString, jsonNumber, jsonTrue, jsonFalse, jsonNull:
|
||||
if parser.kind == jsonString and (key == "" and objStarted):
|
||||
key = parser.str()
|
||||
elif key == "":
|
||||
parser.raiseParseErr("Expected object or array.")
|
||||
else:
|
||||
var obj = parser.parseOther()
|
||||
result.fields.add((key, obj))
|
||||
key = ""
|
||||
of jsonObjectStart:
|
||||
objStarted = True
|
||||
if key != "":
|
||||
# Make sure that parseObj knows that the object has been started
|
||||
var obj = parser.parseObj(True)
|
||||
result.fields.add((key, obj))
|
||||
key = ""
|
||||
of jsonObjectEnd: return
|
||||
of jsonArrayStart:
|
||||
var arr = parser.parseArray()
|
||||
if key != "":
|
||||
result.fields.add((key, arr))
|
||||
key = ""
|
||||
else:
|
||||
return arr
|
||||
of jsonArrayEnd: parser.raiseParseErr("Unexpected ]")
|
||||
|
||||
proc parse*(json: string): PJsonNode =
|
||||
## Parses string `json` into a `PJsonNode`.
|
||||
var stream = newStringStream(json)
|
||||
var parser: TJsonParser
|
||||
parser.open(stream, "")
|
||||
result = parser.parseObj()
|
||||
|
||||
parser.close()
|
||||
|
||||
proc parseFile*(file: String): PJsonNode =
|
||||
## Parses `file` into a `PJsonNode`.
|
||||
var stream = newFileStream(file, fmRead)
|
||||
var parser: TJsonParser
|
||||
parser.open(stream, file)
|
||||
result = parser.parseObj()
|
||||
|
||||
parser.close()
|
||||
|
||||
proc `[]`*(node: PJsonNode, name: String): PJsonNode =
|
||||
## Gets a field from a `JObject`.
|
||||
assert(node.kind == JObject)
|
||||
for key, item in items(node.fields):
|
||||
if key == name:
|
||||
return item
|
||||
return nil
|
||||
|
||||
proc `[]`*(node: PJsonNode, index: Int): PJsonNode =
|
||||
## Gets the node at `index` in an Array.
|
||||
assert(node.kind == JArray)
|
||||
return node.elems[index]
|
||||
|
||||
proc existsKey*(node: PJsonNode, name: String): Bool =
|
||||
## Checks if key `name` exists in `node`.
|
||||
assert(node.kind == JObject)
|
||||
for key, item in items(node.fields):
|
||||
if key == name:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
||||
# { "json": 5 }
|
||||
# To get that we shall use, obj["json"]
|
||||
|
||||
when isMainModule:
|
||||
#var node = parse("{ \"test\": null }")
|
||||
#echo(node.existsKey("test56"))
|
||||
var parsed = parseFile("test2.json")
|
||||
echo(parsed["commits"][0]["author"]["username"].str)
|
||||
echo()
|
||||
echo(pretty(parsed, 2))
|
||||
echo()
|
||||
echo(parsed)
|
||||
|
||||
discard """
|
||||
while true:
|
||||
var json = stdin.readLine()
|
||||
var node = parse(json)
|
||||
echo(node)
|
||||
echo()
|
||||
echo()
|
||||
"""
|
||||
|
||||
484
lib/pure/parsejson.nim
Executable file
484
lib/pure/parsejson.nim
Executable file
@@ -0,0 +1,484 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2010 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## This module implements a simple high performance `JSON`:idx:
|
||||
## parser. JSON (JavaScript Object Notation) is a lightweight
|
||||
## data-interchange format that is easy for humans to read and write
|
||||
## (unlike XML). It is easy for machines to parse and generate.
|
||||
## JSON is based on a subset of the JavaScript Programming Language,
|
||||
## Standard ECMA-262 3rd Edition - December 1999.
|
||||
|
||||
import
|
||||
hashes, strutils, lexbase, streams, unicode
|
||||
|
||||
type
|
||||
TJsonEventKind* = enum ## enumation of all events that may occur when parsing
|
||||
jsonError, ## an error ocurred during parsing
|
||||
jsonEof, ## end of file reached
|
||||
jsonString, ## a string literal
|
||||
jsonNumber, ## a number literal
|
||||
jsonTrue, ## the value ``true``
|
||||
jsonFalse, ## the value ``false``
|
||||
jsonNull, ## the value ``null``
|
||||
jsonObjectStart, ## start of an object: the ``{`` token
|
||||
jsonObjectEnd, ## end of an object: the ``}`` token
|
||||
jsonArrayStart, ## start of an array: the ``[`` token
|
||||
jsonArrayEnd ## start of an array: the ``]`` token
|
||||
|
||||
TTokKind = enum # must be synchronized with TJsonEventKind!
|
||||
tkError,
|
||||
tkEof,
|
||||
tkString,
|
||||
tkNumber,
|
||||
tkTrue,
|
||||
tkFalse,
|
||||
tkNull,
|
||||
tkCurlyLe,
|
||||
tkCurlyRi,
|
||||
tkBracketLe,
|
||||
tkBracketRi,
|
||||
tkColon,
|
||||
tkComma
|
||||
|
||||
TJsonError* = enum ## enumeration that lists all errors that can occur
|
||||
errNone, ## no error
|
||||
errInvalidToken, ## invalid token
|
||||
errStringExpected, ## string expected
|
||||
errColonExpected, ## ``:`` expected
|
||||
errCommaExpected, ## ``,`` expected
|
||||
errBracketRiExpected, ## ``]`` expected
|
||||
errCurlyRiExpected, ## ``}`` expected
|
||||
errQuoteExpected, ## ``"`` or ``'`` expected
|
||||
errEOC_Expected, ## ``*/`` expected
|
||||
errEofExpected, ## EOF expected
|
||||
errExprExpected ## expr expected
|
||||
|
||||
TParserState = enum
|
||||
stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma,
|
||||
stateExpectObjectComma, stateExpectColon, stateExpectValue
|
||||
|
||||
TJsonParser* = object of TBaseLexer ## the parser object.
|
||||
a: string
|
||||
kind: TJsonEventKind
|
||||
err: TJsonError
|
||||
state: seq[TParserState]
|
||||
filename: string
|
||||
|
||||
const
|
||||
errorMessages: array [TJsonError, string] = [
|
||||
"no error",
|
||||
"invalid token",
|
||||
"string expected",
|
||||
"':' expected",
|
||||
"',' expected",
|
||||
"']' expected",
|
||||
"'}' expected",
|
||||
"'\"' or \"'\" expected",
|
||||
"'*/' expected",
|
||||
"EOF expected",
|
||||
"expression expected"
|
||||
]
|
||||
|
||||
proc open*(my: var TJsonParser, input: PStream, filename: string) =
|
||||
## initializes the parser with an input stream. `Filename` is only used
|
||||
## for nice error messages.
|
||||
lexbase.open(my, input)
|
||||
my.filename = filename
|
||||
my.state = @[stateStart]
|
||||
my.kind = jsonError
|
||||
my.a = ""
|
||||
|
||||
proc close*(my: var TJsonParser) {.inline.} =
|
||||
## closes the parser `my` and its associated input stream.
|
||||
lexbase.close(my)
|
||||
|
||||
proc str*(my: TJsonParser): string {.inline.} =
|
||||
## returns the character data for the events: ``jsonNumber``,
|
||||
## ``jsonString``
|
||||
assert(my.kind in {jsonNumber, jsonString})
|
||||
return my.a
|
||||
|
||||
proc number*(my: TJsonParser): float {.inline.} =
|
||||
## returns the number for the event: ``jsonNumber``
|
||||
assert(my.kind == jsonNumber)
|
||||
return parseFloat(my.a)
|
||||
|
||||
proc kind*(my: TJsonParser): TJsonEventKind {.inline.} =
|
||||
## returns the current event type for the JSON parser
|
||||
return my.kind
|
||||
|
||||
proc getColumn*(my: TJsonParser): int {.inline.} =
|
||||
## get the current column the parser has arrived at.
|
||||
result = getColNumber(my, my.bufPos)
|
||||
|
||||
proc getLine*(my: TJsonParser): int {.inline.} =
|
||||
## get the current line the parser has arrived at.
|
||||
result = my.linenumber
|
||||
|
||||
proc getFilename*(my: TJsonParser): string {.inline.} =
|
||||
## get the filename of the file that the parser processes.
|
||||
result = my.filename
|
||||
|
||||
proc errorMsg*(my: TJsonParser): string =
|
||||
## returns a helpful error message for the event ``jsonError``
|
||||
assert(my.kind == jsonError)
|
||||
result = "$1($2, $3) Error: $4" % [
|
||||
my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]]
|
||||
|
||||
proc errorMsgExpected*(my: TJsonParser, e: string): string =
|
||||
## returns an error message "`e` expected" in the same format as the
|
||||
## other error messages
|
||||
result = "$1($2, $3) Error: $4" % [
|
||||
my.filename, $getLine(my), $getColumn(my), e & " expected"]
|
||||
|
||||
proc handleHexChar(c: Char, x: var int): bool =
|
||||
result = true # Success
|
||||
case c
|
||||
of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
|
||||
of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
|
||||
of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
|
||||
else: result = false # error
|
||||
|
||||
proc parseString(my: var TJsonParser): TTokKind =
|
||||
result = tkString
|
||||
var pos = my.bufpos + 1
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
my.err = errQuoteExpected
|
||||
result = tkError
|
||||
break
|
||||
of '"':
|
||||
inc(pos)
|
||||
break
|
||||
of '\\':
|
||||
case buf[pos+1]
|
||||
of '\\', '"', '\'', '/':
|
||||
add(my.a, buf[pos+1])
|
||||
inc(pos, 2)
|
||||
of 'b':
|
||||
add(my.a, '\b')
|
||||
inc(pos, 2)
|
||||
of 'f':
|
||||
add(my.a, '\f')
|
||||
inc(pos, 2)
|
||||
of 'n':
|
||||
add(my.a, '\L')
|
||||
inc(pos, 2)
|
||||
of 'r':
|
||||
add(my.a, '\C')
|
||||
inc(pos, 2)
|
||||
of 't':
|
||||
add(my.a, '\t')
|
||||
inc(pos, 2)
|
||||
of 'u':
|
||||
inc(pos, 2)
|
||||
var r: int
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
add(my.a, toUTF8(TRune(r)))
|
||||
else:
|
||||
# don't bother with the error
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\c')
|
||||
of '\L':
|
||||
pos = lexbase.HandleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos # store back
|
||||
|
||||
proc skip(my: var TJsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '/':
|
||||
if buf[pos+1] == '/':
|
||||
# skip line comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
of '\L':
|
||||
pos = lexbase.HandleLF(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
elif buf[pos+1] == '*':
|
||||
# skip long comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\0':
|
||||
my.err = errEOC_Expected
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.HandleLF(my, pos)
|
||||
buf = my.buf
|
||||
of '*':
|
||||
inc(pos)
|
||||
if buf[pos] == '/':
|
||||
inc(pos)
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
else:
|
||||
break
|
||||
of ' ', '\t':
|
||||
Inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.HandleLF(my, pos)
|
||||
buf = my.buf
|
||||
else:
|
||||
break
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseNumber(my: var TJsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] == '-':
|
||||
add(my.a, '-')
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
add(my.a, "0.")
|
||||
inc(pos)
|
||||
else:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
add(my.a, '.')
|
||||
inc(pos)
|
||||
# digits after the dot:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'E', 'e'}:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'+', '-'}:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseName(my: var TJsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] in IdentStartChars:
|
||||
while buf[pos] in IdentChars:
|
||||
add(my.a, buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
proc getTok(my: var TJsonParser): TTokKind =
|
||||
setLen(my.a, 0)
|
||||
skip(my) # skip whitespace, comments
|
||||
case my.buf[my.bufpos]
|
||||
of '-', '.', '0'..'9':
|
||||
parseNumber(my)
|
||||
result = tkNumber
|
||||
of '"':
|
||||
result = parseString(my)
|
||||
of '[':
|
||||
inc(my.bufpos)
|
||||
result = tkBracketLe
|
||||
of '{':
|
||||
inc(my.bufpos)
|
||||
result = tkCurlyLe
|
||||
of ']':
|
||||
inc(my.bufpos)
|
||||
result = tkBracketRi
|
||||
of '}':
|
||||
inc(my.bufpos)
|
||||
result = tkCurlyRi
|
||||
of ',':
|
||||
inc(my.bufpos)
|
||||
result = tkComma
|
||||
of ':':
|
||||
inc(my.bufpos)
|
||||
result = tkColon
|
||||
of '\0':
|
||||
result = tkEof
|
||||
of 'a'..'z', 'A'..'Z', '_':
|
||||
parseName(my)
|
||||
case my.a
|
||||
of "null": result = tkNull
|
||||
of "true": result = tkTrue
|
||||
of "false": result = tkFalse
|
||||
else: result = tkError
|
||||
else:
|
||||
inc(my.bufpos)
|
||||
result = tkError
|
||||
|
||||
proc next*(my: var TJsonParser) =
|
||||
## retrieves the first/next event. This controls the parser.
|
||||
var tk = getTok(my)
|
||||
var i = my.state.len-1
|
||||
# the following code is a state machine. If we had proper coroutines,
|
||||
# the code could be much simpler.
|
||||
case my.state[i]
|
||||
of stateEof:
|
||||
if tk == tkEof:
|
||||
my.kind = jsonEof
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errEofExpected
|
||||
of stateStart:
|
||||
# tokens allowed?
|
||||
case tk
|
||||
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
|
||||
my.state[i] = stateEof # expect EOF next!
|
||||
my.kind = TJsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateArray) # we expect any
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkEof:
|
||||
my.kind = jsonEof
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errEofExpected
|
||||
of stateObject:
|
||||
case tk
|
||||
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
|
||||
my.state.add(stateExpectColon)
|
||||
my.kind = TJsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateExpectColon)
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateExpectColon)
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkCurlyRi:
|
||||
my.kind = jsonObjectEnd
|
||||
discard my.state.pop()
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errCurlyRiExpected
|
||||
of stateArray:
|
||||
case tk
|
||||
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
|
||||
my.state.add(stateExpectArrayComma) # expect value next!
|
||||
my.kind = TJsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state.add(stateExpectArrayComma)
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state.add(stateExpectArrayComma)
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
of tkBracketRi:
|
||||
my.kind = jsonArrayEnd
|
||||
discard my.state.pop()
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errBracketRiExpected
|
||||
of stateExpectArrayComma:
|
||||
case tk
|
||||
of tkComma:
|
||||
discard my.state.pop()
|
||||
next(my)
|
||||
of tkBracketRi:
|
||||
my.kind = jsonArrayEnd
|
||||
discard my.state.pop() # pop stateExpectArrayComma
|
||||
discard my.state.pop() # pop stateArray
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errBracketRiExpected
|
||||
of stateExpectObjectComma:
|
||||
case tk
|
||||
of tkComma:
|
||||
discard my.state.pop()
|
||||
next(my)
|
||||
of tkCurlyRi:
|
||||
my.kind = jsonObjectEnd
|
||||
discard my.state.pop() # pop stateExpectObjectComma
|
||||
discard my.state.pop() # pop stateObject
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errCurlyRiExpected
|
||||
of stateExpectColon:
|
||||
case tk
|
||||
of tkColon:
|
||||
my.state[i] = stateExpectValue
|
||||
next(my)
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errColonExpected
|
||||
of stateExpectValue:
|
||||
case tk
|
||||
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.kind = TJsonEventKind(ord(tk))
|
||||
of tkBracketLe:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.state.add(stateArray)
|
||||
my.kind = jsonArrayStart
|
||||
of tkCurlyLe:
|
||||
my.state[i] = stateExpectObjectComma
|
||||
my.state.add(stateObject)
|
||||
my.kind = jsonObjectStart
|
||||
else:
|
||||
my.kind = jsonError
|
||||
my.err = errExprExpected
|
||||
|
||||
when isMainModule:
|
||||
import os
|
||||
var s = newFileStream(ParamStr(1), fmRead)
|
||||
if s == nil: quit("cannot open the file" & ParamStr(1))
|
||||
var x: TJsonParser
|
||||
open(x, s, ParamStr(1))
|
||||
while true:
|
||||
next(x)
|
||||
case x.kind
|
||||
of jsonError:
|
||||
Echo(x.errorMsg())
|
||||
break
|
||||
of jsonEof: break
|
||||
of jsonString, jsonNumber: echo(x.str)
|
||||
of jsonTrue: Echo("!TRUE")
|
||||
of jsonFalse: Echo("!FALSE")
|
||||
of jsonNull: Echo("!NULL")
|
||||
of jsonObjectStart: Echo("{")
|
||||
of jsonObjectEnd: Echo("}")
|
||||
of jsonArrayStart: Echo("[")
|
||||
of jsonArrayEnd: Echo("]")
|
||||
|
||||
close(x)
|
||||
|
||||
Reference in New Issue
Block a user