accurate file/line information

This commit is contained in:
Andreas Rumpf
2010-08-14 15:46:07 +02:00
parent fee8e328ef
commit fc0b66a7ff
10 changed files with 508 additions and 81 deletions

View File

@@ -156,6 +156,9 @@ Parsers
* `parsesql <parsesql.html>`_
The ``parsesql`` module implements a simple high performance SQL parser.
* `json <json.html>`_
High performance JSON parser.
* `lexbase <lexbase.html>`_
This is a low level module that implements an extremely efficient buffering
scheme for lexers and parsers. This is used by the diverse parsing modules.

482
lib/pure/json.nim Executable file
View File

@@ -0,0 +1,482 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2010 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements a simple high performance `JSON`:idx:
## parser. JSON (JavaScript Object Notation) is a lightweight
## data-interchange format that is easy for humans to read and write
## (unlike XML). It is easy for machines to parse and generate.
## JSON is based on a subset of the JavaScript Programming Language,
## Standard ECMA-262 3rd Edition - December 1999.
import
hashes, strutils, lexbase, streams, unicode
type
TJsonEventKind* = enum ## enumation of all events that may occur when parsing
jsonError, ## an error ocurred during parsing
jsonEof, ## end of file reached
jsonString, ## a string literal
jsonNumber, ## a number literal
jsonTrue, ## the value ``true``
jsonFalse, ## the value ``false``
jsonNull, ## the value ``null``
jsonObjectStart, ## start of an object: the ``{`` token
jsonObjectEnd, ## end of an object: the ``}`` token
jsonArrayStart, ## start of an array: the ``[`` token
jsonArrayEnd ## start of an array: the ``]`` token
TTokKind = enum # must be synchronized with TJsonEventKind!
tkError,
tkEof,
tkString,
tkNumber,
tkTrue,
tkFalse,
tkNull,
tkCurlyLe,
tkCurlyRi,
tkBracketLe,
tkBracketRi,
tkColon,
tkComma
TJsonError* = enum ## enumeration that lists all errors that can occur
errNone, ## no error
errInvalidToken, ## invalid token
errStringExpected, ## string expected
errColonExpected, ## ``:`` expected
errCommaExpected, ## ``,`` expected
errBracketRiExpected, ## ``]`` expected
errCurlyRiExpected, ## ``}`` expected
errQuoteExpected, ## ``"`` or ``'`` expected
errEOC_Expected, ## ``*/`` expected
errEofExpected, ## EOF expected
errExprExpected ## expr expected
TParserState = enum
stateEof, stateStart, stateObject, stateArray, stateExpectArrayComma,
stateExpectObjectComma, stateExpectColon, stateExpectValue
TJsonParser* = object of TBaseLexer ## the parser object.
a: string
kind: TJsonEventKind
err: TJsonError
state: seq[TParserState]
filename: string
const
errorMessages: array [TJsonError, string] = [
"no error",
"invalid token",
"string expected",
"':' expected",
"',' expected",
"']' expected",
"'}' expected",
"'\"' or \"'\" expected",
"'*/' expected",
"EOF expected",
"expression expected"
]
proc open*(my: var TJsonParser, input: PStream, filename: string) =
## initializes the parser with an input stream. `Filename` is only used
## for nice error messages.
lexbase.open(my, input)
my.filename = filename
my.state = @[stateStart]
my.kind = jsonError
my.a = ""
proc close*(my: var TJsonParser) {.inline.} =
## closes the parser `my` and its associated input stream.
lexbase.close(my)
proc str*(my: TJsonParser): string {.inline.} =
## returns the character data for the events: ``jsonNumber``,
## ``jsonString``
assert(my.kind in {jsonNumber, jsonString})
return my.a
proc number*(my: TJsonParser): float {.inline.} =
## returns the number for the event: ``jsonNumber``
assert(my.kind == jsonNumber)
return parseFloat(my.a)
proc kind*(my: TJsonParser): TJsonEventKind {.inline.} =
## returns the current event type for the JSON parser
return my.kind
proc getColumn*(my: TJsonParser): int {.inline.} =
## get the current column the parser has arrived at.
result = getColNumber(my, my.bufPos)
proc getLine*(my: TJsonParser): int {.inline.} =
## get the current line the parser has arrived at.
result = my.linenumber
proc getFilename*(my: TJsonParser): string {.inline.} =
## get the filename of the file that the parser processes.
result = my.filename
proc errorMsg*(my: TJsonParser): string =
## returns a helpful error message for the event ``jsonError``
assert(my.kind == jsonError)
result = "$1($2, $3) Error: $4" % [
my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]]
proc errorMsgExpected*(my: TJsonParser, e: string): string =
## returns an error message "`e` expected" in the same format as the
## other error messages
result = "$1($2, $3) Error: $4" % [
my.filename, $getLine(my), $getColumn(my), e & " expected"]
proc handleHexChar(c: Char, x: var int): bool =
result = true # Success
case c
of '0'..'9': x = (x shl 4) or (ord(c) - ord('0'))
of 'a'..'f': x = (x shl 4) or (ord(c) - ord('a') + 10)
of 'A'..'F': x = (x shl 4) or (ord(c) - ord('A') + 10)
else: result = false # error
proc parseString(my: var TJsonParser): TTokKind =
result = tkString
var pos = my.bufpos + 1
var buf = my.buf
while true:
case buf[pos]
of '\0':
my.err = errQuoteExpected
result = tkError
break
of '"':
inc(pos)
break
of '\\':
case buf[pos+1]
of '\\', '"', '\'', '/':
add(my.a, buf[pos+1])
inc(pos, 2)
of 'b':
add(my.a, '\b')
inc(pos, 2)
of 'f':
add(my.a, '\f')
inc(pos, 2)
of 'n':
add(my.a, '\L')
inc(pos, 2)
of 'r':
add(my.a, '\C')
inc(pos, 2)
of 't':
add(my.a, '\t')
inc(pos, 2)
of 'u':
inc(pos, 2)
var r: int
if handleHexChar(buf[pos], r): inc(pos)
if handleHexChar(buf[pos], r): inc(pos)
if handleHexChar(buf[pos], r): inc(pos)
if handleHexChar(buf[pos], r): inc(pos)
add(my.a, toUTF8(TRune(r)))
else:
# don't bother with the error
add(my.a, buf[pos])
inc(pos)
of '\c':
pos = lexbase.HandleCR(my, pos)
buf = my.buf
add(my.a, '\c')
of '\L':
pos = lexbase.HandleLF(my, pos)
buf = my.buf
add(my.a, '\L')
else:
add(my.a, buf[pos])
inc(pos)
my.bufpos = pos # store back
proc skip(my: var TJsonParser) =
var pos = my.bufpos
var buf = my.buf
while true:
case buf[pos]
of '/':
if buf[pos+1] == '/':
# skip line comment:
inc(pos, 2)
while true:
case buf[pos]
of '\0':
break
of '\c':
pos = lexbase.HandleCR(my, pos)
buf = my.buf
break
of '\L':
pos = lexbase.HandleLF(my, pos)
buf = my.buf
break
else:
inc(pos)
elif buf[pos+1] == '*':
# skip long comment:
inc(pos, 2)
while true:
case buf[pos]
of '\0':
my.err = errEOC_Expected
break
of '\c':
pos = lexbase.HandleCR(my, pos)
buf = my.buf
of '\L':
pos = lexbase.HandleLF(my, pos)
buf = my.buf
of '*':
inc(pos)
if buf[pos] == '/':
inc(pos)
break
else:
inc(pos)
else:
break
of ' ', '\t':
Inc(pos)
of '\c':
pos = lexbase.HandleCR(my, pos)
buf = my.buf
of '\L':
pos = lexbase.HandleLF(my, pos)
buf = my.buf
else:
break
my.bufpos = pos
proc parseNumber(my: var TJsonParser) =
var pos = my.bufpos
var buf = my.buf
if buf[pos] == '-':
add(my.a, '-')
inc(pos)
if buf[pos] == '.':
add(my.a, "0.")
inc(pos)
else:
while buf[pos] in Digits:
add(my.a, buf[pos])
inc(pos)
if buf[pos] == '.':
add(my.a, '.')
inc(pos)
# digits after the dot:
while buf[pos] in Digits:
add(my.a, buf[pos])
inc(pos)
if buf[pos] in {'E', 'e'}:
add(my.a, buf[pos])
inc(pos)
if buf[pos] in {'+', '-'}:
add(my.a, buf[pos])
inc(pos)
while buf[pos] in Digits:
add(my.a, buf[pos])
inc(pos)
my.bufpos = pos
proc parseName(my: var TJsonParser) =
var pos = my.bufpos
var buf = my.buf
if buf[pos] in IdentStartChars:
while buf[pos] in IdentChars:
add(my.a, buf[pos])
inc(pos)
my.bufpos = pos
proc getTok(my: var TJsonParser): TTokKind =
setLen(my.a, 0)
skip(my) # skip whitespace, comments
case my.buf[my.bufpos]
of '-', '.', '0'..'9':
parseNumber(my)
result = tkNumber
of '"':
result = parseString(my)
of '[':
inc(my.bufpos)
result = tkBracketLe
of '{':
inc(my.bufpos)
result = tkCurlyLe
of ']':
inc(my.bufpos)
result = tkBracketRi
of '}':
inc(my.bufpos)
result = tkCurlyRi
of ',':
inc(my.bufpos)
result = tkComma
of ':':
inc(my.bufpos)
result = tkColon
of '\0':
result = tkEof
of 'a'..'z', 'A'..'Z', '_':
parseName(my)
case my.a
of "null": result = tkNull
of "true": result = tkTrue
of "false": result = tkFalse
else: result = tkError
else:
inc(my.bufpos)
result = tkError
proc next*(my: var TJsonParser) =
## retrieves the first/next event. This controls the parser.
var tk = getTok(my)
var i = my.state.len-1
case my.state[i]
of stateEof:
if tk == tkEof:
my.kind = jsonEof
else:
my.kind = jsonError
my.err = errEofExpected
of stateStart:
# tokens allowed?
case tk
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
my.state[i] = stateEof # expect EOF next!
my.kind = TJsonEventKind(ord(tk))
of tkBracketLe:
my.state.add(stateArray) # we expect any
my.kind = jsonArrayStart
of tkCurlyLe:
my.state.add(stateObject)
my.kind = jsonObjectStart
of tkEof:
my.kind = jsonEof
else:
my.kind = jsonError
my.err = errEofExpected
of stateObject:
case tk
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
my.state.add(stateExpectColon)
my.kind = TJsonEventKind(ord(tk))
of tkBracketLe:
my.state.add(stateExpectColon)
my.state.add(stateArray)
my.kind = jsonArrayStart
of tkCurlyLe:
my.state.add(stateExpectColon)
my.state.add(stateObject)
my.kind = jsonObjectStart
of tkCurlyRi:
my.kind = jsonObjectEnd
discard my.state.pop()
else:
my.kind = jsonError
my.err = errCurlyRiExpected
of stateArray:
case tk
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
my.state.add(stateExpectArrayComma) # expect value next!
my.kind = TJsonEventKind(ord(tk))
of tkBracketLe:
my.state.add(stateExpectArrayComma)
my.state.add(stateArray)
my.kind = jsonArrayStart
of tkCurlyLe:
my.state.add(stateExpectArrayComma)
my.state.add(stateObject)
my.kind = jsonObjectStart
of tkBracketRi:
my.kind = jsonArrayEnd
discard my.state.pop()
else:
my.kind = jsonError
my.err = errBracketRiExpected
of stateExpectArrayComma:
case tk
of tkComma:
discard my.state.pop()
next(my)
of tkBracketRi:
my.kind = jsonArrayEnd
discard my.state.pop() # pop stateExpectArrayComma
discard my.state.pop() # pop stateArray
else:
my.kind = jsonError
my.err = errBracketRiExpected
of stateExpectObjectComma:
case tk
of tkComma:
discard my.state.pop()
next(my)
of tkCurlyRi:
my.kind = jsonObjectEnd
discard my.state.pop() # pop stateExpectObjectComma
discard my.state.pop() # pop stateObject
else:
my.kind = jsonError
my.err = errCurlyRiExpected
of stateExpectColon:
case tk
of tkColon:
my.state[i] = stateExpectValue
next(my)
else:
my.kind = jsonError
my.err = errColonExpected
of stateExpectValue:
case tk
of tkString, tkNumber, tkTrue, tkFalse, tkNull:
my.state[i] = stateExpectObjectComma
my.kind = TJsonEventKind(ord(tk))
of tkBracketLe:
my.state[i] = stateExpectObjectComma
my.state.add(stateArray)
my.kind = jsonArrayStart
of tkCurlyLe:
my.state[i] = stateExpectObjectComma
my.state.add(stateObject)
my.kind = jsonObjectStart
else:
my.kind = jsonError
my.err = errExprExpected
when isMainModule:
import os
var s = newFileStream(ParamStr(1), fmRead)
if s == nil: quit("cannot open the file" & ParamStr(1))
var x: TJsonParser
open(x, s, ParamStr(1))
while true:
next(x)
case x.kind
of jsonError:
Echo(x.errorMsg())
break
of jsonEof: break
of jsonString, jsonNumber: echo(x.str)
of jsonTrue: Echo("!TRUE")
of jsonFalse: Echo("!FALSE")
of jsonNull: Echo("!NULL")
of jsonObjectStart: Echo("{")
of jsonObjectEnd: Echo("}")
of jsonArrayStart: Echo("[")
of jsonArrayEnd: Echo("]")
close(x)

View File

@@ -68,7 +68,8 @@ var
tempFrames: array [0..127, PFrame] # cannot be allocated on the stack!
stackTraceNewLine* = "\n" ## undocumented feature
stackTraceNewLine* = "\n" ## undocumented feature; it is replaced by ``<br>``
## for CGI applications
proc auxWriteStackTrace(f: PFrame, s: var string) =
const
@@ -102,10 +103,14 @@ proc auxWriteStackTrace(f: PFrame, s: var string) =
add(s, $(total-i-1))
add(s, " calls omitted) ...")
else:
add(s, $tempFrames[j].procname)
var oldLen = s.len
add(s, tempFrames[j].filename)
if tempFrames[j].line > 0:
add(s, ", line: ")
add(s, '(')
add(s, $tempFrames[j].line)
add(s, ')')
for k in 1..max(1, 25-(s.len-oldLen)): add(s, ' ')
add(s, tempFrames[j].procname)
add(s, stackTraceNewLine)
proc rawWriteStackTrace(s: var string) =

View File

@@ -1,64 +0,0 @@
# strip those silly GTK/ATK prefixes...
import
expandimportc, os
const
filelist = [
("sdl/sdl", "sdl"),
("sdl/sdl_net", "sdl"),
("sdl/sdl_gfx", "sdl"),
("sdl/sdl_image", "sdl"),
("sdl/sdl_mixer_nosmpeg", "sdl"),
("sdl/sdl_mixer", "sdl"),
("sdl/sdl_ttf", "sdl"),
("sdl/smpeg", "sdl"),
("libcurl", "curl"),
("mysql", "mysql"),
("postgres", ""),
("sqlite3", "sqlite3"),
("tcl", "tcl"),
("cairo/cairo", "cairo"),
("cairo/cairoft", "cairo"),
("cairo/cairowin32", "cairo"),
("cairo/cairoxlib", "cairo"),
("gtk/atk", "atk"),
("gtk/gdk2", "gdk"),
("gtk/gdk2pixbuf", "gdk"),
("gtk/gdkglext", "gdk"),
("gtk/glib2", ""),
("gtk/gtk2", "gtk"),
("gtk/gtkglext", "gtk"),
("gtk/gtkhtml", "gtk"),
("gtk/libglade2", "glade"),
("gtk/pango", "pango"),
("gtk/pangoutils", "pango"),
("lua/lua", "lua"),
("lua/lauxlib", "luaL"),
("lua/lualib", "lua"),
("opengl/gl", ""),
("opengl/glext", ""),
("opengl/wingl", ""),
("opengl/glu", ""),
("opengl/glut", ""),
("opengl/glx", ""),
("pcre/pcre", "pcre")
]
proc createDirs =
createDir("lib/newwrap/sdl")
createDir("lib/newwrap/cairo")
createDir("lib/newwrap/gtk")
createDir("lib/newwrap/lua")
createDir("lib/newwrap/opengl")
createDir("lib/newwrap/pcre")
for filename, prefix in items(filelist):
var f = addFileExt(filename, "nim")
main("lib/wrappers" / f, "lib/newwrap" / f, prefix)

View File

@@ -211,7 +211,7 @@ identifier should be converted:
.. code-block:: C
#mangle "'GTK_'{.*}" "TGtk$1"
For convenience the PEG pattern and the replacement can be a single identifiers
For convenience the PEG pattern and the replacement can be single identifiers
too, there is no need to quote them:
.. code-block:: C

View File

@@ -24,10 +24,8 @@ proc genLineDir(p: BProc, t: PNode) =
elif ({optLineTrace, optStackTrace} * p.Options ==
{optLineTrace, optStackTrace}) and
((p.prc == nil) or not (sfPure in p.prc.flags)):
inc(p.labels)
appff(p.s[cpsStmts], "F.line = $1;$n",
"%LOC$2 = getelementptr %TF %F, %NI 2$n" &
"store %NI $1, %NI* %LOC$2$n", [toRope(line), toRope(p.labels)])
appf(p.s[cpsStmts], "F.line = $1;F.filename = $2;$n",
[toRope(line), makeCString(toFilename(t.info).extractFilename)])
proc finishTryStmt(p: BProc, howMany: int) =
for i in countup(1, howMany):
@@ -265,14 +263,15 @@ proc getRaiseFrmt(p: BProc): string =
result = "#raiseException((#E_Base*)$1, $2);$n"
proc genRaiseStmt(p: BProc, t: PNode) =
genLineDir(p, t)
if t.sons[0] != nil:
var a: TLoc
InitLocExpr(p, t.sons[0], a)
var e = rdLoc(a)
var typ = skipTypes(t.sons[0].typ, abstractPtrs)
genLineDir(p, t)
appcg(p, cpsStmts, getRaiseFrmt(p), [e, makeCString(typ.sym.name.s)])
else:
genLineDir(p, t)
# reraise the last exception:
if gCmd == cmdCompileToCpp:
appcg(p, cpsStmts, "throw;" & tnl)

View File

@@ -563,8 +563,7 @@ proc genProcAux(m: BModule, prc: PSym) =
if optStackTrace in prc.options:
getFrameDecl(p)
app(generatedProc, p.s[cpsLocals])
procname = CStringLit(p, generatedProc,
prc.owner.name.s & '.' & prc.name.s)
procname = CStringLit(p, generatedProc, prc.name.s)
filename = CStringLit(p, generatedProc, toFilename(prc.info))
app(generatedProc, initFrame(p, procname, filename))
else:
@@ -577,7 +576,7 @@ proc genProcAux(m: BModule, prc: PSym) =
if prc.loc.a < 0:
appf(m.s[cfsDebugInit], "profileData[$1].procname = $2;$n", [
toRope(gProcProfile),
makeCString(prc.owner.name.s & '.' & prc.name.s)])
makeCString(prc.name.s)])
prc.loc.a = gProcProfile
inc(gProcProfile)
prepend(p.s[cpsInit], toRope("NIM_profilingStart = getticks();" & tnl))
@@ -782,7 +781,7 @@ proc genInitCode(m: BModule) =
getFrameDecl(m.initProc)
app(prc, m.initProc.s[cpsLocals])
app(prc, m.s[cfsTypeInit1])
procname = CStringLit(m.initProc, prc, "module " & m.module.name.s)
procname = CStringLit(m.initProc, prc, m.module.name.s)
filename = CStringLit(m.initProc, prc, toFilename(m.module.info))
app(prc, initFrame(m.initProc, procname, filename))
else:

View File

@@ -5,12 +5,15 @@
{
"key1": null,
{1: 2, {}: 4}: 12,
"key2": [
{},
{ },
[],
[ /* empty array */ ],
[ // empty array
],
-1e10 // another comment
@@ -19,4 +22,5 @@
"key3": false
}
// [{}, {899: 12, "x": "y"}, [], 123, 89, 89, "xyz", null, [], [], [1, 2, 3]]

View File

@@ -1,7 +1,6 @@
For version 0.8.10
==================
- accurate file/line information
- exception propagation across DLLs
- fix exception handling
- fix implicit generic routines
@@ -74,7 +73,6 @@ Library
- bignums
- ftp, smtp (and other internet protocols)
- finish json module: use coroutines for this!
- pdcurses bindings
- queues additional to streams: have two positions (read/write) instead of one

View File

@@ -33,7 +33,8 @@ srcdoc: "impure/zipfiles;pure/xmlgen;pure/macros;pure/parseutils;pure/browsers"
srcdoc: "impure/db_postgres;impure/db_mysql;impure/db_sqlite"
srcdoc: "pure/httpserver;pure/httpclient"
srcdoc: "pure/ropes;pure/unidecode/unidecode;pure/xmldom;pure/xmldomparser"
srcdoc: "pure/xmlparser;pure/htmlparser;pure/xmltree;pure/colors;impure/graphics"
srcdoc: "pure/xmlparser;pure/htmlparser;pure/xmltree;pure/colors"
srcdoc: "pure/json;impure/graphics"
webdoc: "wrappers/libcurl;pure/md5;wrappers/mysql;wrappers/iup"
webdoc: "wrappers/sqlite3;wrappers/postgres;wrappers/tinyc"