From fc0b66a7ff1c17fae66e604279524e757451bd1d Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Sat, 14 Aug 2010 15:46:07 +0200 Subject: [PATCH] accurate file/line information --- doc/lib.txt | 3 + lib/pure/json.nim | 482 +++++++++++++++++++++++++++++++++++ lib/system/excpt.nim | 11 +- noprefix.nim | 64 ----- rod/c2nim/manual.txt | 2 +- rod/ccgstmts.nim | 9 +- rod/cgen.nim | 7 +- tests/testdata/jsontest.json | 6 +- todo.txt | 2 - web/nimrod.ini | 3 +- 10 files changed, 508 insertions(+), 81 deletions(-) create mode 100755 lib/pure/json.nim delete mode 100755 noprefix.nim diff --git a/doc/lib.txt b/doc/lib.txt index abe4ed020d..3fac9fef16 100755 --- a/doc/lib.txt +++ b/doc/lib.txt @@ -156,6 +156,9 @@ Parsers * `parsesql `_ The ``parsesql`` module implements a simple high performance SQL parser. +* `json `_ + High performance JSON parser. + * `lexbase `_ This is a low level module that implements an extremely efficient buffering scheme for lexers and parsers. This is used by the diverse parsing modules. diff --git a/lib/pure/json.nim b/lib/pure/json.nim new file mode 100755 index 0000000000..bc52fb886d --- /dev/null +++ b/lib/pure/json.nim @@ -0,0 +1,482 @@ +# +# +# Nimrod's Runtime Library +# (c) Copyright 2010 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## This module implements a simple high performance `JSON`:idx: +## parser. JSON (JavaScript Object Notation) is a lightweight +## data-interchange format that is easy for humans to read and write +## (unlike XML). It is easy for machines to parse and generate. +## JSON is based on a subset of the JavaScript Programming Language, +## Standard ECMA-262 3rd Edition - December 1999. + +import + hashes, strutils, lexbase, streams, unicode + +type + TJsonEventKind* = enum ## enumation of all events that may occur when parsing + jsonError, ## an error ocurred during parsing + jsonEof, ## end of file reached + jsonString, ## a string literal + jsonNumber, ## a number literal + jsonTrue, ## the value ``true`` + jsonFalse, ## the value ``false`` + jsonNull, ## the value ``null`` + jsonObjectStart, ## start of an object: the ``{`` token + jsonObjectEnd, ## end of an object: the ``}`` token + jsonArrayStart, ## start of an array: the ``[`` token + jsonArrayEnd ## start of an array: the ``]`` token + + TTokKind = enum # must be synchronized with TJsonEventKind! + tkError, + tkEof, + tkString, + tkNumber, + tkTrue, + tkFalse, + tkNull, + tkCurlyLe, + tkCurlyRi, + tkBracketLe, + tkBracketRi, + tkColon, + tkComma + + TJsonError* = enum ## enumeration that lists all errors that can occur + errNone, ## no error + errInvalidToken, ## invalid token + errStringExpected, ## string expected + errColonExpected, ## ``:`` expected + errCommaExpected, ## ``,`` expected + errBracketRiExpected, ## ``]`` expected + errCurlyRiExpected, ## ``}`` expected + errQuoteExpected, ## ``"`` or ``'`` expected + errEOC_Expected, ## ``*/`` expected + errEofExpected, ## EOF expected + errExprExpected ## expr expected + + TParserState = enum + stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma, + stateExpectObjectComma, stateExpectColon, stateExpectValue + + TJsonParser* = object of TBaseLexer ## the parser object. + a: string + kind: TJsonEventKind + err: TJsonError + state: seq[TParserState] + filename: string + +const + errorMessages: array [TJsonError, string] = [ + "no error", + "invalid token", + "string expected", + "':' expected", + "',' expected", + "']' expected", + "'}' expected", + "'\"' or \"'\" expected", + "'*/' expected", + "EOF expected", + "expression expected" + ] + +proc open*(my: var TJsonParser, input: PStream, filename: string) = + ## initializes the parser with an input stream. `Filename` is only used + ## for nice error messages. + lexbase.open(my, input) + my.filename = filename + my.state = @[stateStart] + my.kind = jsonError + my.a = "" + +proc close*(my: var TJsonParser) {.inline.} = + ## closes the parser `my` and its associated input stream. + lexbase.close(my) + +proc str*(my: TJsonParser): string {.inline.} = + ## returns the character data for the events: ``jsonNumber``, + ## ``jsonString`` + assert(my.kind in {jsonNumber, jsonString}) + return my.a + +proc number*(my: TJsonParser): float {.inline.} = + ## returns the number for the event: ``jsonNumber`` + assert(my.kind == jsonNumber) + return parseFloat(my.a) + +proc kind*(my: TJsonParser): TJsonEventKind {.inline.} = + ## returns the current event type for the JSON parser + return my.kind + +proc getColumn*(my: TJsonParser): int {.inline.} = + ## get the current column the parser has arrived at. + result = getColNumber(my, my.bufPos) + +proc getLine*(my: TJsonParser): int {.inline.} = + ## get the current line the parser has arrived at. + result = my.linenumber + +proc getFilename*(my: TJsonParser): string {.inline.} = + ## get the filename of the file that the parser processes. + result = my.filename + +proc errorMsg*(my: TJsonParser): string = + ## returns a helpful error message for the event ``jsonError`` + assert(my.kind == jsonError) + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]] + +proc errorMsgExpected*(my: TJsonParser, e: string): string = + ## returns an error message "`e` expected" in the same format as the + ## other error messages + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), e & " expected"] + +proc handleHexChar(c: Char, x: var int): bool = + result = true # Success + case c + of '0'..'9': x = (x shl 4) or (ord(c) - ord('0')) + of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10) + of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10) + else: result = false # error + +proc parseString(my: var TJsonParser): TTokKind = + result = tkString + var pos = my.bufpos + 1 + var buf = my.buf + while true: + case buf[pos] + of '\0': + my.err = errQuoteExpected + result = tkError + break + of '"': + inc(pos) + break + of '\\': + case buf[pos+1] + of '\\', '"', '\'', '/': + add(my.a, buf[pos+1]) + inc(pos, 2) + of 'b': + add(my.a, '\b') + inc(pos, 2) + of 'f': + add(my.a, '\f') + inc(pos, 2) + of 'n': + add(my.a, '\L') + inc(pos, 2) + of 'r': + add(my.a, '\C') + inc(pos, 2) + of 't': + add(my.a, '\t') + inc(pos, 2) + of 'u': + inc(pos, 2) + var r: int + if handleHexChar(buf[pos], r): inc(pos) + if handleHexChar(buf[pos], r): inc(pos) + if handleHexChar(buf[pos], r): inc(pos) + if handleHexChar(buf[pos], r): inc(pos) + add(my.a, toUTF8(TRune(r))) + else: + # don't bother with the error + add(my.a, buf[pos]) + inc(pos) + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + add(my.a, '\c') + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + add(my.a, '\L') + else: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos # store back + +proc skip(my: var TJsonParser) = + var pos = my.bufpos + var buf = my.buf + while true: + case buf[pos] + of '/': + if buf[pos+1] == '/': + # skip line comment: + inc(pos, 2) + while true: + case buf[pos] + of '\0': + break + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + break + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + break + else: + inc(pos) + elif buf[pos+1] == '*': + # skip long comment: + inc(pos, 2) + while true: + case buf[pos] + of '\0': + my.err = errEOC_Expected + break + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + of '*': + inc(pos) + if buf[pos] == '/': + inc(pos) + break + else: + inc(pos) + else: + break + of ' ', '\t': + Inc(pos) + of '\c': + pos = lexbase.HandleCR(my, pos) + buf = my.buf + of '\L': + pos = lexbase.HandleLF(my, pos) + buf = my.buf + else: + break + my.bufpos = pos + +proc parseNumber(my: var TJsonParser) = + var pos = my.bufpos + var buf = my.buf + if buf[pos] == '-': + add(my.a, '-') + inc(pos) + if buf[pos] == '.': + add(my.a, "0.") + inc(pos) + else: + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] == '.': + add(my.a, '.') + inc(pos) + # digits after the dot: + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] in {'E', 'e'}: + add(my.a, buf[pos]) + inc(pos) + if buf[pos] in {'+', '-'}: + add(my.a, buf[pos]) + inc(pos) + while buf[pos] in Digits: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + +proc parseName(my: var TJsonParser) = + var pos = my.bufpos + var buf = my.buf + if buf[pos] in IdentStartChars: + while buf[pos] in IdentChars: + add(my.a, buf[pos]) + inc(pos) + my.bufpos = pos + +proc getTok(my: var TJsonParser): TTokKind = + setLen(my.a, 0) + skip(my) # skip whitespace, comments + case my.buf[my.bufpos] + of '-', '.', '0'..'9': + parseNumber(my) + result = tkNumber + of '"': + result = parseString(my) + of '[': + inc(my.bufpos) + result = tkBracketLe + of '{': + inc(my.bufpos) + result = tkCurlyLe + of ']': + inc(my.bufpos) + result = tkBracketRi + of '}': + inc(my.bufpos) + result = tkCurlyRi + of ',': + inc(my.bufpos) + result = tkComma + of ':': + inc(my.bufpos) + result = tkColon + of '\0': + result = tkEof + of 'a'..'z', 'A'..'Z', '_': + parseName(my) + case my.a + of "null": result = tkNull + of "true": result = tkTrue + of "false": result = tkFalse + else: result = tkError + else: + inc(my.bufpos) + result = tkError + +proc next*(my: var TJsonParser) = + ## retrieves the first/next event. This controls the parser. + var tk = getTok(my) + var i = my.state.len-1 + case my.state[i] + of stateEof: + if tk == tkEof: + my.kind = jsonEof + else: + my.kind = jsonError + my.err = errEofExpected + of stateStart: + # tokens allowed? + case tk + of tkString, tkNumber, tkTrue, tkFalse, tkNull: + my.state[i] = stateEof # expect EOF next! + my.kind = TJsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateArray) # we expect any + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkEof: + my.kind = jsonEof + else: + my.kind = jsonError + my.err = errEofExpected + of stateObject: + case tk + of tkString, tkNumber, tkTrue, tkFalse, tkNull: + my.state.add(stateExpectColon) + my.kind = TJsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateExpectColon) + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateExpectColon) + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkCurlyRi: + my.kind = jsonObjectEnd + discard my.state.pop() + else: + my.kind = jsonError + my.err = errCurlyRiExpected + of stateArray: + case tk + of tkString, tkNumber, tkTrue, tkFalse, tkNull: + my.state.add(stateExpectArrayComma) # expect value next! + my.kind = TJsonEventKind(ord(tk)) + of tkBracketLe: + my.state.add(stateExpectArrayComma) + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state.add(stateExpectArrayComma) + my.state.add(stateObject) + my.kind = jsonObjectStart + of tkBracketRi: + my.kind = jsonArrayEnd + discard my.state.pop() + else: + my.kind = jsonError + my.err = errBracketRiExpected + of stateExpectArrayComma: + case tk + of tkComma: + discard my.state.pop() + next(my) + of tkBracketRi: + my.kind = jsonArrayEnd + discard my.state.pop() # pop stateExpectArrayComma + discard my.state.pop() # pop stateArray + else: + my.kind = jsonError + my.err = errBracketRiExpected + of stateExpectObjectComma: + case tk + of tkComma: + discard my.state.pop() + next(my) + of tkCurlyRi: + my.kind = jsonObjectEnd + discard my.state.pop() # pop stateExpectObjectComma + discard my.state.pop() # pop stateObject + else: + my.kind = jsonError + my.err = errCurlyRiExpected + of stateExpectColon: + case tk + of tkColon: + my.state[i] = stateExpectValue + next(my) + else: + my.kind = jsonError + my.err = errColonExpected + of stateExpectValue: + case tk + of tkString, tkNumber, tkTrue, tkFalse, tkNull: + my.state[i] = stateExpectObjectComma + my.kind = TJsonEventKind(ord(tk)) + of tkBracketLe: + my.state[i] = stateExpectObjectComma + my.state.add(stateArray) + my.kind = jsonArrayStart + of tkCurlyLe: + my.state[i] = stateExpectObjectComma + my.state.add(stateObject) + my.kind = jsonObjectStart + else: + my.kind = jsonError + my.err = errExprExpected + +when isMainModule: + import os + var s = newFileStream(ParamStr(1), fmRead) + if s == nil: quit("cannot open the file" & ParamStr(1)) + var x: TJsonParser + open(x, s, ParamStr(1)) + while true: + next(x) + case x.kind + of jsonError: + Echo(x.errorMsg()) + break + of jsonEof: break + of jsonString, jsonNumber: echo(x.str) + of jsonTrue: Echo("!TRUE") + of jsonFalse: Echo("!FALSE") + of jsonNull: Echo("!NULL") + of jsonObjectStart: Echo("{") + of jsonObjectEnd: Echo("}") + of jsonArrayStart: Echo("[") + of jsonArrayEnd: Echo("]") + + close(x) + diff --git a/lib/system/excpt.nim b/lib/system/excpt.nim index 673e5a50eb..d8bdf2a9ff 100755 --- a/lib/system/excpt.nim +++ b/lib/system/excpt.nim @@ -68,7 +68,8 @@ var tempFrames: array [0..127, PFrame] # cannot be allocated on the stack! - stackTraceNewLine* = "\n" ## undocumented feature + stackTraceNewLine* = "\n" ## undocumented feature; it is replaced by ``
`` + ## for CGI applications proc auxWriteStackTrace(f: PFrame, s: var string) = const @@ -102,10 +103,14 @@ proc auxWriteStackTrace(f: PFrame, s: var string) = add(s, $(total-i-1)) add(s, " calls omitted) ...") else: - add(s, $tempFrames[j].procname) + var oldLen = s.len + add(s, tempFrames[j].filename) if tempFrames[j].line > 0: - add(s, ", line: ") + add(s, '(') add(s, $tempFrames[j].line) + add(s, ')') + for k in 1..max(1, 25-(s.len-oldLen)): add(s, ' ') + add(s, tempFrames[j].procname) add(s, stackTraceNewLine) proc rawWriteStackTrace(s: var string) = diff --git a/noprefix.nim b/noprefix.nim deleted file mode 100755 index e7ec26c560..0000000000 --- a/noprefix.nim +++ /dev/null @@ -1,64 +0,0 @@ -# strip those silly GTK/ATK prefixes... - -import - expandimportc, os - -const - filelist = [ - ("sdl/sdl", "sdl"), - ("sdl/sdl_net", "sdl"), - ("sdl/sdl_gfx", "sdl"), - ("sdl/sdl_image", "sdl"), - ("sdl/sdl_mixer_nosmpeg", "sdl"), - ("sdl/sdl_mixer", "sdl"), - ("sdl/sdl_ttf", "sdl"), - ("sdl/smpeg", "sdl"), - - ("libcurl", "curl"), - ("mysql", "mysql"), - ("postgres", ""), - ("sqlite3", "sqlite3"), - ("tcl", "tcl"), - ("cairo/cairo", "cairo"), - ("cairo/cairoft", "cairo"), - ("cairo/cairowin32", "cairo"), - ("cairo/cairoxlib", "cairo"), - - ("gtk/atk", "atk"), - ("gtk/gdk2", "gdk"), - ("gtk/gdk2pixbuf", "gdk"), - ("gtk/gdkglext", "gdk"), - ("gtk/glib2", ""), - ("gtk/gtk2", "gtk"), - ("gtk/gtkglext", "gtk"), - ("gtk/gtkhtml", "gtk"), - ("gtk/libglade2", "glade"), - ("gtk/pango", "pango"), - ("gtk/pangoutils", "pango"), - - ("lua/lua", "lua"), - ("lua/lauxlib", "luaL"), - ("lua/lualib", "lua"), - - ("opengl/gl", ""), - ("opengl/glext", ""), - ("opengl/wingl", ""), - ("opengl/glu", ""), - ("opengl/glut", ""), - ("opengl/glx", ""), - - ("pcre/pcre", "pcre") - ] - -proc createDirs = - createDir("lib/newwrap/sdl") - createDir("lib/newwrap/cairo") - createDir("lib/newwrap/gtk") - createDir("lib/newwrap/lua") - createDir("lib/newwrap/opengl") - createDir("lib/newwrap/pcre") - -for filename, prefix in items(filelist): - var f = addFileExt(filename, "nim") - main("lib/wrappers" / f, "lib/newwrap" / f, prefix) - diff --git a/rod/c2nim/manual.txt b/rod/c2nim/manual.txt index 7aa8b557b6..c485a57f1f 100755 --- a/rod/c2nim/manual.txt +++ b/rod/c2nim/manual.txt @@ -211,7 +211,7 @@ identifier should be converted: .. code-block:: C #mangle "'GTK_'{.*}" "TGtk$1" -For convenience the PEG pattern and the replacement can be a single identifiers +For convenience the PEG pattern and the replacement can be single identifiers too, there is no need to quote them: .. code-block:: C diff --git a/rod/ccgstmts.nim b/rod/ccgstmts.nim index 348615cce6..f07dfb7e4c 100755 --- a/rod/ccgstmts.nim +++ b/rod/ccgstmts.nim @@ -24,10 +24,8 @@ proc genLineDir(p: BProc, t: PNode) = elif ({optLineTrace, optStackTrace} * p.Options == {optLineTrace, optStackTrace}) and ((p.prc == nil) or not (sfPure in p.prc.flags)): - inc(p.labels) - appff(p.s[cpsStmts], "F.line = $1;$n", - "%LOC$2 = getelementptr %TF %F, %NI 2$n" & - "store %NI $1, %NI* %LOC$2$n", [toRope(line), toRope(p.labels)]) + appf(p.s[cpsStmts], "F.line = $1;F.filename = $2;$n", + [toRope(line), makeCString(toFilename(t.info).extractFilename)]) proc finishTryStmt(p: BProc, howMany: int) = for i in countup(1, howMany): @@ -265,14 +263,15 @@ proc getRaiseFrmt(p: BProc): string = result = "#raiseException((#E_Base*)$1, $2);$n" proc genRaiseStmt(p: BProc, t: PNode) = - genLineDir(p, t) if t.sons[0] != nil: var a: TLoc InitLocExpr(p, t.sons[0], a) var e = rdLoc(a) var typ = skipTypes(t.sons[0].typ, abstractPtrs) + genLineDir(p, t) appcg(p, cpsStmts, getRaiseFrmt(p), [e, makeCString(typ.sym.name.s)]) else: + genLineDir(p, t) # reraise the last exception: if gCmd == cmdCompileToCpp: appcg(p, cpsStmts, "throw;" & tnl) diff --git a/rod/cgen.nim b/rod/cgen.nim index 562b660eba..691fb29fb1 100755 --- a/rod/cgen.nim +++ b/rod/cgen.nim @@ -563,8 +563,7 @@ proc genProcAux(m: BModule, prc: PSym) = if optStackTrace in prc.options: getFrameDecl(p) app(generatedProc, p.s[cpsLocals]) - procname = CStringLit(p, generatedProc, - prc.owner.name.s & '.' & prc.name.s) + procname = CStringLit(p, generatedProc, prc.name.s) filename = CStringLit(p, generatedProc, toFilename(prc.info)) app(generatedProc, initFrame(p, procname, filename)) else: @@ -577,7 +576,7 @@ proc genProcAux(m: BModule, prc: PSym) = if prc.loc.a < 0: appf(m.s[cfsDebugInit], "profileData[$1].procname = $2;$n", [ toRope(gProcProfile), - makeCString(prc.owner.name.s & '.' & prc.name.s)]) + makeCString(prc.name.s)]) prc.loc.a = gProcProfile inc(gProcProfile) prepend(p.s[cpsInit], toRope("NIM_profilingStart = getticks();" & tnl)) @@ -782,7 +781,7 @@ proc genInitCode(m: BModule) = getFrameDecl(m.initProc) app(prc, m.initProc.s[cpsLocals]) app(prc, m.s[cfsTypeInit1]) - procname = CStringLit(m.initProc, prc, "module " & m.module.name.s) + procname = CStringLit(m.initProc, prc, m.module.name.s) filename = CStringLit(m.initProc, prc, toFilename(m.module.info)) app(prc, initFrame(m.initProc, procname, filename)) else: diff --git a/tests/testdata/jsontest.json b/tests/testdata/jsontest.json index 27b5ba1d1e..d5555456aa 100755 --- a/tests/testdata/jsontest.json +++ b/tests/testdata/jsontest.json @@ -5,12 +5,15 @@ { "key1": null, + {1: 2, {}: 4}: 12, + "key2": [ {}, { }, [], - [ /* empty array */ ], + [ // empty array + ], -1e10 // another comment @@ -19,4 +22,5 @@ "key3": false } +// [{}, {899: 12, "x": "y"}, [], 123, 89, 89, "xyz", null, [], [], [1, 2, 3]] diff --git a/todo.txt b/todo.txt index e0036e1710..6536a6e19a 100755 --- a/todo.txt +++ b/todo.txt @@ -1,7 +1,6 @@ For version 0.8.10 ================== -- accurate file/line information - exception propagation across DLLs - fix exception handling - fix implicit generic routines @@ -74,7 +73,6 @@ Library - bignums - ftp, smtp (and other internet protocols) -- finish json module: use coroutines for this! - pdcurses bindings - queues additional to streams: have two positions (read/write) instead of one diff --git a/web/nimrod.ini b/web/nimrod.ini index efdd5fa967..2eb8ed66d6 100755 --- a/web/nimrod.ini +++ b/web/nimrod.ini @@ -33,7 +33,8 @@ srcdoc: "impure/zipfiles;pure/xmlgen;pure/macros;pure/parseutils;pure/browsers" srcdoc: "impure/db_postgres;impure/db_mysql;impure/db_sqlite" srcdoc: "pure/httpserver;pure/httpclient" srcdoc: "pure/ropes;pure/unidecode/unidecode;pure/xmldom;pure/xmldomparser" -srcdoc: "pure/xmlparser;pure/htmlparser;pure/xmltree;pure/colors;impure/graphics" +srcdoc: "pure/xmlparser;pure/htmlparser;pure/xmltree;pure/colors" +srcdoc: "pure/json;impure/graphics" webdoc: "wrappers/libcurl;pure/md5;wrappers/mysql;wrappers/iup" webdoc: "wrappers/sqlite3;wrappers/postgres;wrappers/tinyc"