StringStream and parseJson, parseCfg, parseSql et al for the vm (#10746)

This commit is contained in:
Arne Döring
2019-02-28 22:57:57 +01:00
committed by Andreas Rumpf
parent 728ff1004a
commit 1102b8ac6e
15 changed files with 354 additions and 403 deletions

View File

@@ -145,38 +145,34 @@ proc atEndMark(buf: cstring, pos: int): bool =
proc readVerbatimSection(L: var TBaseLexer): Rope =
var pos = L.bufpos
var buf = L.buf
var r = newStringOfCap(30_000)
while true:
case buf[pos]
case L.buf[pos]
of CR:
pos = nimlexbase.handleCR(L, pos)
buf = L.buf
r.add('\L')
of LF:
pos = nimlexbase.handleLF(L, pos)
buf = L.buf
r.add('\L')
of '\0':
doAssert(false, "ccgmerge: expected: " & NimMergeEndMark)
break
else:
if atEndMark(buf, pos):
if atEndMark(L.buf, pos):
inc pos, NimMergeEndMark.len
break
r.add(buf[pos])
r.add(L.buf[pos])
inc pos
L.bufpos = pos
result = r.rope
proc readKey(L: var TBaseLexer, result: var string) =
var pos = L.bufpos
var buf = L.buf
setLen(result, 0)
while buf[pos] in IdentChars:
result.add(buf[pos])
while L.buf[pos] in IdentChars:
result.add(L.buf[pos])
inc pos
if buf[pos] != ':': doAssert(false, "ccgmerge: ':' expected")
if L.buf[pos] != ':': doAssert(false, "ccgmerge: ':' expected")
L.bufpos = pos + 1 # skip ':'
proc newFakeType(id: int): PType =

View File

@@ -318,17 +318,16 @@ template eatChar(L: var TLexer, t: var TToken) =
proc getNumber(L: var TLexer, result: var TToken) =
proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]): Natural =
var pos = L.bufpos # use registers for pos, buf
var buf = L.buf
result = 0
while true:
if buf[pos] in chars:
add(tok.literal, buf[pos])
if L.buf[pos] in chars:
add(tok.literal, L.buf[pos])
inc(pos)
inc(result)
else:
break
if buf[pos] == '_':
if buf[pos+1] notin chars:
if L.buf[pos] == '_':
if L.buf[pos+1] notin chars:
lexMessage(L, errGenerated,
"only single underscores may occur in a token and token may not " &
"end with an underscore: e.g. '1__1' and '1_' are invalid")
@@ -339,9 +338,8 @@ proc getNumber(L: var TLexer, result: var TToken) =
proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) =
var pos = L.bufpos # use registers for pos, buf
var buf = L.buf
while buf[pos] in chars:
add(tok.literal, buf[pos])
while L.buf[pos] in chars:
add(tok.literal, L.buf[pos])
inc(pos)
L.bufpos = pos
@@ -800,25 +798,23 @@ type
proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
var pos = L.bufpos
var buf = L.buf # put `buf` in a register
var line = L.lineNumber # save linenumber for better error message
tokenBegin(tok, pos - ord(mode == raw))
inc pos # skip "
if buf[pos] == '\"' and buf[pos+1] == '\"':
if L.buf[pos] == '\"' and L.buf[pos+1] == '\"':
tok.tokType = tkTripleStrLit # long string literal:
inc(pos, 2) # skip ""
# skip leading newline:
if buf[pos] in {' ', '\t'}:
if L.buf[pos] in {' ', '\t'}:
var newpos = pos+1
while buf[newpos] in {' ', '\t'}: inc newpos
if buf[newpos] in {CR, LF}: pos = newpos
while L.buf[newpos] in {' ', '\t'}: inc newpos
if L.buf[newpos] in {CR, LF}: pos = newpos
pos = handleCRLF(L, pos)
buf = L.buf
while true:
case buf[pos]
case L.buf[pos]
of '\"':
if buf[pos+1] == '\"' and buf[pos+2] == '\"' and
buf[pos+3] != '\"':
if L.buf[pos+1] == '\"' and L.buf[pos+2] == '\"' and
L.buf[pos+3] != '\"':
tokenEndIgnore(tok, pos+2)
L.bufpos = pos + 3 # skip the three """
break
@@ -827,7 +823,6 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
of CR, LF:
tokenEndIgnore(tok, pos)
pos = handleCRLF(L, pos)
buf = L.buf
add(tok.literal, "\n")
of nimlexbase.EndOfFile:
tokenEndIgnore(tok, pos)
@@ -838,16 +833,16 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
L.bufpos = pos
break
else:
add(tok.literal, buf[pos])
add(tok.literal, L.buf[pos])
inc(pos)
else:
# ordinary string literal
if mode != normal: tok.tokType = tkRStrLit
else: tok.tokType = tkStrLit
while true:
var c = buf[pos]
var c = L.buf[pos]
if c == '\"':
if mode != normal and buf[pos+1] == '\"':
if mode != normal and L.buf[pos+1] == '\"':
inc(pos, 2)
add(tok.literal, '"')
else:
@@ -885,10 +880,9 @@ proc getCharacter(L: var TLexer, tok: var TToken) =
proc getSymbol(L: var TLexer, tok: var TToken) =
var h: Hash = 0
var pos = L.bufpos
var buf = L.buf
tokenBegin(tok, pos)
while true:
var c = buf[pos]
var c = L.buf[pos]
case c
of 'a'..'z', '0'..'9', '\x80'..'\xFF':
h = h !& ord(c)
@@ -898,7 +892,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
h = h !& ord(c)
inc(pos)
of '_':
if buf[pos+1] notin SymChars:
if L.buf[pos+1] notin SymChars:
lexMessage(L, errGenerated, "invalid token: trailing underscore")
break
inc(pos)
@@ -923,11 +917,10 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int,
proc getOperator(L: var TLexer, tok: var TToken) =
var pos = L.bufpos
var buf = L.buf
tokenBegin(tok, pos)
var h: Hash = 0
while true:
var c = buf[pos]
var c = L.buf[pos]
if c notin OpChars: break
h = h !& ord(c)
inc(pos)
@@ -936,10 +929,10 @@ proc getOperator(L: var TLexer, tok: var TToken) =
# advance pos but don't store it in L.bufpos so the next token (which might
# be an operator too) gets the preceding spaces:
tok.strongSpaceB = 0
while buf[pos] == ' ':
while L.buf[pos] == ' ':
inc pos
inc tok.strongSpaceB
if buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
if L.buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
tok.strongSpaceB = -1
proc getPrecedence*(tok: TToken, strongSpaces: bool): int =
@@ -980,9 +973,8 @@ proc getPrecedence*(tok: TToken, strongSpaces: bool): int =
proc newlineFollows*(L: TLexer): bool =
var pos = L.bufpos
var buf = L.buf
while true:
case buf[pos]
case L.buf[pos]
of ' ', '\t':
inc(pos)
of CR, LF:
@@ -990,49 +982,47 @@ proc newlineFollows*(L: TLexer): bool =
break
of '#':
inc(pos)
if buf[pos] == '#': inc(pos)
if buf[pos] != '[': return true
if L.buf[pos] == '#': inc(pos)
if L.buf[pos] != '[': return true
else:
break
proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
isDoc: bool) =
var pos = start
var buf = L.buf
var toStrip = 0
tokenBegin(tok, pos)
# detect the amount of indentation:
if isDoc:
toStrip = getColNumber(L, pos)
while buf[pos] == ' ': inc pos
if buf[pos] in {CR, LF}:
while L.buf[pos] == ' ': inc pos
if L.buf[pos] in {CR, LF}:
pos = handleCRLF(L, pos)
buf = L.buf
toStrip = 0
while buf[pos] == ' ':
while L.buf[pos] == ' ':
inc pos
inc toStrip
var nesting = 0
while true:
case buf[pos]
case L.buf[pos]
of '#':
if isDoc:
if buf[pos+1] == '#' and buf[pos+2] == '[':
if L.buf[pos+1] == '#' and L.buf[pos+2] == '[':
inc nesting
tok.literal.add '#'
elif buf[pos+1] == '[':
elif L.buf[pos+1] == '[':
inc nesting
inc pos
of ']':
if isDoc:
if buf[pos+1] == '#' and buf[pos+2] == '#':
if L.buf[pos+1] == '#' and L.buf[pos+2] == '#':
if nesting == 0:
tokenEndIgnore(tok, pos+2)
inc(pos, 3)
break
dec nesting
tok.literal.add ']'
elif buf[pos+1] == '#':
elif L.buf[pos+1] == '#':
if nesting == 0:
tokenEndIgnore(tok, pos+1)
inc(pos, 2)
@@ -1042,14 +1032,13 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
of CR, LF:
tokenEndIgnore(tok, pos)
pos = handleCRLF(L, pos)
buf = L.buf
# strip leading whitespace:
when defined(nimpretty): tok.literal.add "\L"
if isDoc:
when not defined(nimpretty): tok.literal.add "\n"
inc tok.iNumber
var c = toStrip
while buf[pos] == ' ' and c > 0:
while L.buf[pos] == ' ' and c > 0:
inc pos
dec c
of nimlexbase.EndOfFile:
@@ -1057,7 +1046,7 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
lexMessagePos(L, errGenerated, pos, "end of multiline comment expected")
break
else:
if isDoc or defined(nimpretty): tok.literal.add buf[pos]
if isDoc or defined(nimpretty): tok.literal.add L.buf[pos]
inc(pos)
L.bufpos = pos
when defined(nimpretty):
@@ -1065,49 +1054,47 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
proc scanComment(L: var TLexer, tok: var TToken) =
var pos = L.bufpos
var buf = L.buf
tok.tokType = tkComment
# iNumber contains the number of '\n' in the token
tok.iNumber = 0
assert buf[pos+1] == '#'
assert L.buf[pos+1] == '#'
when defined(nimpretty):
tok.commentOffsetA = L.offsetBase + pos - 1
if buf[pos+2] == '[':
if L.buf[pos+2] == '[':
skipMultiLineComment(L, tok, pos+3, true)
return
tokenBegin(tok, pos)
inc(pos, 2)
var toStrip = 0
while buf[pos] == ' ':
while L.buf[pos] == ' ':
inc pos
inc toStrip
while true:
var lastBackslash = -1
while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
if buf[pos] == '\\': lastBackslash = pos+1
add(tok.literal, buf[pos])
while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
if L.buf[pos] == '\\': lastBackslash = pos+1
add(tok.literal, L.buf[pos])
inc(pos)
tokenEndIgnore(tok, pos)
pos = handleCRLF(L, pos)
buf = L.buf
var indent = 0
while buf[pos] == ' ':
while L.buf[pos] == ' ':
inc(pos)
inc(indent)
if buf[pos] == '#' and buf[pos+1] == '#':
if L.buf[pos] == '#' and L.buf[pos+1] == '#':
tok.literal.add "\n"
inc(pos, 2)
var c = toStrip
while buf[pos] == ' ' and c > 0:
while L.buf[pos] == ' ' and c > 0:
inc pos
dec c
inc tok.iNumber
else:
if buf[pos] > ' ':
if L.buf[pos] > ' ':
L.indentAhead = indent
tokenEndIgnore(tok, pos)
break
@@ -1117,7 +1104,6 @@ proc scanComment(L: var TLexer, tok: var TToken) =
proc skip(L: var TLexer, tok: var TToken) =
var pos = L.bufpos
var buf = L.buf
tokenBegin(tok, pos)
tok.strongSpaceA = 0
when defined(nimpretty):
@@ -1127,7 +1113,7 @@ proc skip(L: var TLexer, tok: var TToken) =
tok.commentOffsetB = tok.commentOffsetA
tok.line = -1
while true:
case buf[pos]
case L.buf[pos]
of ' ':
inc(pos)
inc(tok.strongSpaceA)
@@ -1137,13 +1123,12 @@ proc skip(L: var TLexer, tok: var TToken) =
of CR, LF:
tokenEndPrevious(tok, pos)
pos = handleCRLF(L, pos)
buf = L.buf
var indent = 0
while true:
if buf[pos] == ' ':
if L.buf[pos] == ' ':
inc(pos)
inc(indent)
elif buf[pos] == '#' and buf[pos+1] == '[':
elif L.buf[pos] == '#' and L.buf[pos+1] == '[':
when defined(nimpretty):
hasComment = true
if tok.line < 0:
@@ -1151,32 +1136,30 @@ proc skip(L: var TLexer, tok: var TToken) =
commentIndent = indent
skipMultiLineComment(L, tok, pos+2, false)
pos = L.bufpos
buf = L.buf
else:
break
tok.strongSpaceA = 0
when defined(nimpretty):
if buf[pos] == '#' and tok.line < 0: commentIndent = indent
if buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#'):
if L.buf[pos] == '#' and tok.line < 0: commentIndent = indent
if L.buf[pos] > ' ' and (L.buf[pos] != '#' or L.buf[pos+1] == '#'):
tok.indent = indent
L.currLineIndent = indent
break
of '#':
# do not skip documentation comment:
if buf[pos+1] == '#': break
if L.buf[pos+1] == '#': break
when defined(nimpretty):
hasComment = true
if tok.line < 0:
tok.line = L.lineNumber
if buf[pos+1] == '[':
if L.buf[pos+1] == '[':
skipMultiLineComment(L, tok, pos+2, false)
pos = L.bufpos
buf = L.buf
else:
tokenBegin(tok, pos)
while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
when defined(nimpretty): tok.literal.add buf[pos]
while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
when defined(nimpretty): tok.literal.add L.buf[pos]
inc(pos)
tokenEndIgnore(tok, pos+1)
when defined(nimpretty):

View File

@@ -39,8 +39,7 @@ const
type
TBaseLexer* = object of RootObj
bufpos*: int
buf*: cstring
bufLen*: int # length of buffer in characters
buf*: string
stream*: PLLStream # we read from this stream
lineNumber*: int # the current line number
# private data:
@@ -65,11 +64,7 @@ proc handleLF*(L: var TBaseLexer, pos: int): int
# of the LF.
# implementation
const
chrSize = sizeof(char)
proc closeBaseLexer(L: var TBaseLexer) =
dealloc(L.buf)
llStreamClose(L.stream)
proc fillBuffer(L: var TBaseLexer) =
@@ -80,14 +75,13 @@ proc fillBuffer(L: var TBaseLexer) =
oldBufLen: int
# we know here that pos == L.sentinel, but not if this proc
# is called the first time by initBaseLexer()
assert(L.sentinel < L.bufLen)
toCopy = L.bufLen - L.sentinel - 1
assert(L.sentinel < L.buf.len)
toCopy = L.buf.len - L.sentinel - 1
assert(toCopy >= 0)
if toCopy > 0:
moveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize)
moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy)
# "moveMem" handles overlapping regions
charsRead = llStreamRead(L.stream, addr(L.buf[toCopy]),
(L.sentinel + 1) * chrSize) div chrSize
charsRead = llStreamRead(L.stream, addr L.buf[toCopy], L.sentinel + 1)
s = toCopy + charsRead
if charsRead < L.sentinel + 1:
L.buf[s] = EndOfFile # set end marker
@@ -96,7 +90,7 @@ proc fillBuffer(L: var TBaseLexer) =
# compute sentinel:
dec(s) # BUGFIX (valgrind)
while true:
assert(s < L.bufLen)
assert(s < L.buf.len)
while (s >= 0) and not (L.buf[s] in NewLines): dec(s)
if s >= 0:
# we found an appropriate character for a sentinel:
@@ -105,17 +99,16 @@ proc fillBuffer(L: var TBaseLexer) =
else:
# rather than to give up here because the line is too long,
# double the buffer's size and try again:
oldBufLen = L.bufLen
L.bufLen = L.bufLen * 2
L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize))
assert(L.bufLen - oldBufLen == oldBufLen)
oldBufLen = L.buf.len
L.buf.setLen(L.buf.len * 2)
assert(L.buf.len - oldBufLen == oldBufLen)
charsRead = llStreamRead(L.stream, addr(L.buf[oldBufLen]),
oldBufLen * chrSize) div chrSize
oldBufLen)
if charsRead < oldBufLen:
L.buf[oldBufLen + charsRead] = EndOfFile
L.sentinel = oldBufLen + charsRead
break
s = L.bufLen - 1
s = L.buf.len - 1
proc fillBaseLexer(L: var TBaseLexer, pos: int): int =
assert(pos <= L.sentinel)
@@ -149,8 +142,7 @@ proc openBaseLexer(L: var TBaseLexer, inputstream: PLLStream, bufLen = 8192) =
assert(bufLen > 0)
L.bufpos = 0
L.offsetBase = 0
L.bufLen = bufLen
L.buf = cast[cstring](alloc(bufLen * chrSize))
L.buf = newString(bufLen)
L.sentinel = bufLen - 1
L.lineStart = 0
L.lineNumber = 1 # lines start at 1

View File

@@ -2039,6 +2039,8 @@ proc gen(c: PCtx; n: PNode; dest: var TDest; flags: TGenFlags = {}) =
genConv(c, n, n.sons[1], dest)
of nkObjDownConv:
genConv(c, n, n.sons[0], dest)
of nkObjUpConv:
genConv(c, n, n.sons[0], dest)
of nkVarSection, nkLetSection:
unused(c, n, dest)
genVarSection(c, n)

View File

@@ -155,18 +155,17 @@ proc getAdornment(L: var Lexer, tok: var Token) =
proc getIndentAux(L: var Lexer, start: int): int =
var pos = start
var buf = L.buf
# skip the newline (but include it in the token!)
if buf[pos] == '\x0D':
if buf[pos + 1] == '\x0A': inc(pos, 2)
if L.buf[pos] == '\x0D':
if L.buf[pos + 1] == '\x0A': inc(pos, 2)
else: inc(pos)
elif buf[pos] == '\x0A':
elif L.buf[pos] == '\x0A':
inc(pos)
if L.skipPounds:
if buf[pos] == '#': inc(pos)
if buf[pos] == '#': inc(pos)
if L.buf[pos] == '#': inc(pos)
if L.buf[pos] == '#': inc(pos)
while true:
case buf[pos]
case L.buf[pos]
of ' ', '\x0B', '\x0C':
inc(pos)
inc(result)
@@ -175,9 +174,9 @@ proc getIndentAux(L: var Lexer, start: int): int =
result = result - (result mod 8) + 8
else:
break # EndOfFile also leaves the loop
if buf[pos] == '\0':
if L.buf[pos] == '\0':
result = 0
elif (buf[pos] == '\x0A') or (buf[pos] == '\x0D'):
elif (L.buf[pos] == '\x0A') or (L.buf[pos] == '\x0D'):
# look at the next line for proper indentation:
result = getIndentAux(L, pos)
L.bufpos = pos # no need to set back buf

View File

@@ -28,11 +28,7 @@ type
BaseLexer* = object of RootObj ## the base lexer. Inherit your lexer from
## this object.
bufpos*: int ## the current position within the buffer
when defined(js): ## the buffer itself
buf*: string
else:
buf*: cstring
bufLen*: int ## length of buffer in characters
buf*: string ## the buffer itself
input: Stream ## the input stream
lineNumber*: int ## the current line number
sentinel: int
@@ -40,13 +36,8 @@ type
offsetBase*: int # use ``offsetBase + bufpos`` to get the offset
refillChars: set[char]
const
chrSize = sizeof(char)
proc close*(L: var BaseLexer) =
## closes the base lexer. This closes `L`'s associated stream too.
when not defined(js):
dealloc(L.buf)
close(L.input)
proc fillBuffer(L: var BaseLexer) =
@@ -57,17 +48,21 @@ proc fillBuffer(L: var BaseLexer) =
oldBufLen: int
# we know here that pos == L.sentinel, but not if this proc
# is called the first time by initBaseLexer()
assert(L.sentinel < L.bufLen)
toCopy = L.bufLen - L.sentinel - 1
assert(L.sentinel + 1 <= L.buf.len)
toCopy = L.buf.len - (L.sentinel + 1)
assert(toCopy >= 0)
if toCopy > 0:
when defined(js):
for i in 0 ..< toCopy: L.buf[i] = L.buf[L.sentinel + 1 + i]
for i in 0 ..< toCopy:
L.buf[i] = L.buf[L.sentinel + 1 + i]
else:
# "moveMem" handles overlapping regions
moveMem(L.buf, addr L.buf[L.sentinel + 1], toCopy * chrSize)
charsRead = readData(L.input, addr(L.buf[toCopy]),
(L.sentinel + 1) * chrSize) div chrSize
when nimvm:
for i in 0 ..< toCopy:
L.buf[i] = L.buf[L.sentinel + 1 + i]
else:
# "moveMem" handles overlapping regions
moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy)
charsRead = L.input.readDataStr(L.buf, toCopy ..< toCopy + L.sentinel + 1)
s = toCopy + charsRead
if charsRead < L.sentinel + 1:
L.buf[s] = EndOfFile # set end marker
@@ -76,7 +71,7 @@ proc fillBuffer(L: var BaseLexer) =
# compute sentinel:
dec(s) # BUGFIX (valgrind)
while true:
assert(s < L.bufLen)
assert(s < L.buf.len)
while s >= 0 and L.buf[s] notin L.refillChars: dec(s)
if s >= 0:
# we found an appropriate character for a sentinel:
@@ -85,20 +80,14 @@ proc fillBuffer(L: var BaseLexer) =
else:
# rather than to give up here because the line is too long,
# double the buffer's size and try again:
oldBufLen = L.bufLen
L.bufLen = L.bufLen * 2
when defined(js):
L.buf.setLen(L.bufLen)
else:
L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize))
assert(L.bufLen - oldBufLen == oldBufLen)
charsRead = readData(L.input, addr(L.buf[oldBufLen]),
oldBufLen * chrSize) div chrSize
oldBufLen = L.buf.len
L.buf.setLen(L.buf.len * 2)
charsRead = readDataStr(L.input, L.buf, oldBufLen ..< L.buf.len)
if charsRead < oldBufLen:
L.buf[oldBufLen + charsRead] = EndOfFile
L.sentinel = oldBufLen + charsRead
break
s = L.bufLen - 1
s = L.buf.len - 1
proc fillBaseLexer(L: var BaseLexer, pos: int): int =
assert(pos <= L.sentinel)
@@ -148,12 +137,8 @@ proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192;
L.input = input
L.bufpos = 0
L.offsetBase = 0
L.bufLen = bufLen
L.refillChars = refillChars
when defined(js):
L.buf = newString(bufLen)
else:
L.buf = cast[cstring](alloc(bufLen * chrSize))
L.buf = newString(bufLen)
L.sentinel = bufLen - 1
L.lineStart = 0
L.lineNumber = 1 # lines start at 1

View File

@@ -261,35 +261,32 @@ proc handleCRLF(c: var CfgParser, pos: int): int =
proc getString(c: var CfgParser, tok: var Token, rawMode: bool) =
var pos = c.bufpos + 1 # skip "
var buf = c.buf # put `buf` in a register
tok.kind = tkSymbol
if (buf[pos] == '"') and (buf[pos + 1] == '"'):
if (c.buf[pos] == '"') and (c.buf[pos + 1] == '"'):
# long string literal:
inc(pos, 2) # skip ""
# skip leading newline:
pos = handleCRLF(c, pos)
buf = c.buf
while true:
case buf[pos]
case c.buf[pos]
of '"':
if (buf[pos + 1] == '"') and (buf[pos + 2] == '"'): break
if (c.buf[pos + 1] == '"') and (c.buf[pos + 2] == '"'): break
add(tok.literal, '"')
inc(pos)
of '\c', '\L':
pos = handleCRLF(c, pos)
buf = c.buf
add(tok.literal, "\n")
of lexbase.EndOfFile:
tok.kind = tkInvalid
break
else:
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
c.bufpos = pos + 3 # skip the three """
else:
# ordinary string literal
while true:
var ch = buf[pos]
var ch = c.buf[pos]
if ch == '"':
inc(pos) # skip '"'
break
@@ -307,26 +304,23 @@ proc getString(c: var CfgParser, tok: var Token, rawMode: bool) =
proc getSymbol(c: var CfgParser, tok: var Token) =
var pos = c.bufpos
var buf = c.buf
while true:
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
if not (buf[pos] in SymChars): break
if not (c.buf[pos] in SymChars): break
c.bufpos = pos
tok.kind = tkSymbol
proc skip(c: var CfgParser) =
var pos = c.bufpos
var buf = c.buf
while true:
case buf[pos]
case c.buf[pos]
of ' ', '\t':
inc(pos)
of '#', ';':
while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
of '\c', '\L':
pos = handleCRLF(c, pos)
buf = c.buf
else:
break # EndOfFile also leaves the loop
c.bufpos = pos

View File

@@ -156,44 +156,41 @@ proc open*(my: var CsvParser, filename: string,
proc parseField(my: var CsvParser, a: var string) =
var pos = my.bufpos
var buf = my.buf
if my.skipWhite:
while buf[pos] in {' ', '\t'}: inc(pos)
while my.buf[pos] in {' ', '\t'}: inc(pos)
setLen(a, 0) # reuse memory
if buf[pos] == my.quote and my.quote != '\0':
if my.buf[pos] == my.quote and my.quote != '\0':
inc(pos)
while true:
let c = buf[pos]
let c = my.buf[pos]
if c == '\0':
my.bufpos = pos # can continue after exception?
error(my, pos, my.quote & " expected")
break
elif c == my.quote:
if my.esc == '\0' and buf[pos+1] == my.quote:
if my.esc == '\0' and my.buf[pos+1] == my.quote:
add(a, my.quote)
inc(pos, 2)
else:
inc(pos)
break
elif c == my.esc:
add(a, buf[pos+1])
add(a, my.buf[pos+1])
inc(pos, 2)
else:
case c
of '\c':
pos = handleCR(my, pos)
buf = my.buf
add(a, "\n")
of '\l':
pos = handleLF(my, pos)
buf = my.buf
add(a, "\n")
else:
add(a, c)
inc(pos)
else:
while true:
let c = buf[pos]
let c = my.buf[pos]
if c == my.sep: break
if c in {'\c', '\l', '\0'}: break
add(a, c)

View File

@@ -182,11 +182,10 @@ proc parseEscapedUTF16*(buf: cstring, pos: var int): int =
proc parseString(my: var JsonParser): TokKind =
result = tkString
var pos = my.bufpos + 1
var buf = my.buf
if my.rawStringLiterals:
add(my.a, '"')
while true:
case buf[pos]
case my.buf[pos]
of '\0':
my.err = errQuoteExpected
result = tkError
@@ -199,9 +198,9 @@ proc parseString(my: var JsonParser): TokKind =
of '\\':
if my.rawStringLiterals:
add(my.a, '\\')
case buf[pos+1]
case my.buf[pos+1]
of '\\', '"', '\'', '/':
add(my.a, buf[pos+1])
add(my.a, my.buf[pos+1])
inc(pos, 2)
of 'b':
add(my.a, '\b')
@@ -223,17 +222,17 @@ proc parseString(my: var JsonParser): TokKind =
add(my.a, 'u')
inc(pos, 2)
var pos2 = pos
var r = parseEscapedUTF16(buf, pos)
var r = parseEscapedUTF16(my.buf, pos)
if r < 0:
my.err = errInvalidToken
break
# Deal with surrogates
if (r and 0xfc00) == 0xd800:
if buf[pos] != '\\' or buf[pos+1] != 'u':
if my.buf[pos] != '\\' or my.buf[pos+1] != 'u':
my.err = errInvalidToken
break
inc(pos, 2)
var s = parseEscapedUTF16(buf, pos)
var s = parseEscapedUTF16(my.buf, pos)
if (s and 0xfc00) == 0xdc00 and s > 0:
r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00))
else:
@@ -242,8 +241,8 @@ proc parseString(my: var JsonParser): TokKind =
if my.rawStringLiterals:
let length = pos - pos2
for i in 1 .. length:
if buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}:
add(my.a, buf[pos2])
if my.buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}:
add(my.a, my.buf[pos2])
inc pos2
else:
break
@@ -251,61 +250,54 @@ proc parseString(my: var JsonParser): TokKind =
add(my.a, toUTF8(Rune(r)))
else:
# don't bother with the error
add(my.a, buf[pos])
add(my.a, my.buf[pos])
inc(pos)
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
add(my.a, '\c')
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
add(my.a, '\L')
else:
add(my.a, buf[pos])
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos # store back
proc skip(my: var JsonParser) =
var pos = my.bufpos
var buf = my.buf
while true:
case buf[pos]
case my.buf[pos]
of '/':
if buf[pos+1] == '/':
if my.buf[pos+1] == '/':
# skip line comment:
inc(pos, 2)
while true:
case buf[pos]
case my.buf[pos]
of '\0':
break
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
break
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
break
else:
inc(pos)
elif buf[pos+1] == '*':
elif my.buf[pos+1] == '*':
# skip long comment:
inc(pos, 2)
while true:
case buf[pos]
case my.buf[pos]
of '\0':
my.err = errEOC_Expected
break
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
of '*':
inc(pos)
if buf[pos] == '/':
if my.buf[pos] == '/':
inc(pos)
break
else:
@@ -316,51 +308,47 @@ proc skip(my: var JsonParser) =
inc(pos)
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
else:
break
my.bufpos = pos
proc parseNumber(my: var JsonParser) =
var pos = my.bufpos
var buf = my.buf
if buf[pos] == '-':
if my.buf[pos] == '-':
add(my.a, '-')
inc(pos)
if buf[pos] == '.':
if my.buf[pos] == '.':
add(my.a, "0.")
inc(pos)
else:
while buf[pos] in Digits:
add(my.a, buf[pos])
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if buf[pos] == '.':
if my.buf[pos] == '.':
add(my.a, '.')
inc(pos)
# digits after the dot:
while buf[pos] in Digits:
add(my.a, buf[pos])
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if buf[pos] in {'E', 'e'}:
add(my.a, buf[pos])
if my.buf[pos] in {'E', 'e'}:
add(my.a, my.buf[pos])
inc(pos)
if buf[pos] in {'+', '-'}:
add(my.a, buf[pos])
if my.buf[pos] in {'+', '-'}:
add(my.a, my.buf[pos])
inc(pos)
while buf[pos] in Digits:
add(my.a, buf[pos])
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
proc parseName(my: var JsonParser) =
var pos = my.bufpos
var buf = my.buf
if buf[pos] in IdentStartChars:
while buf[pos] in IdentChars:
add(my.a, buf[pos])
if my.buf[pos] in IdentStartChars:
while my.buf[pos] in IdentChars:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos

View File

@@ -148,35 +148,33 @@ proc handleCRLF(c: var SqlLexer, pos: int): int =
proc skip(c: var SqlLexer) =
var pos = c.bufpos
var buf = c.buf
var nested = 0
while true:
case buf[pos]
case c.buf[pos]
of ' ', '\t':
inc(pos)
of '-':
if buf[pos+1] == '-':
while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
if c.buf[pos+1] == '-':
while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
else:
break
of '/':
if buf[pos+1] == '*':
if c.buf[pos+1] == '*':
inc(pos,2)
while true:
case buf[pos]
case c.buf[pos]
of '\0': break
of '\c', '\L':
pos = handleCRLF(c, pos)
buf = c.buf
of '*':
if buf[pos+1] == '/':
if c.buf[pos+1] == '/':
inc(pos, 2)
if nested <= 0: break
dec(nested)
else:
inc(pos)
of '/':
if buf[pos+1] == '*':
if c.buf[pos+1] == '*':
inc(pos, 2)
inc(nested)
else:
@@ -185,21 +183,19 @@ proc skip(c: var SqlLexer) =
else: break
of '\c', '\L':
pos = handleCRLF(c, pos)
buf = c.buf
else:
break # EndOfFile also leaves the loop
c.bufpos = pos
proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) =
var pos = c.bufpos + 1
var buf = c.buf
tok.kind = kind
block parseLoop:
while true:
while true:
var ch = buf[pos]
var ch = c.buf[pos]
if ch == '\'':
if buf[pos+1] == '\'':
if c.buf[pos+1] == '\'':
inc(pos, 2)
add(tok.literal, '\'')
else:
@@ -221,30 +217,27 @@ proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) =
if c.lineNumber > line:
# a new line whitespace has been parsed, so we check if the string
# continues after the whitespace:
buf = c.buf # may have been reallocated
pos = c.bufpos
if buf[pos] == '\'': inc(pos)
if c.buf[pos] == '\'': inc(pos)
else: break parseLoop
else: break parseLoop
c.bufpos = pos
proc getDollarString(c: var SqlLexer, tok: var Token) =
var pos = c.bufpos + 1
var buf = c.buf
tok.kind = tkDollarQuotedConstant
var tag = "$"
while buf[pos] in IdentChars:
add(tag, buf[pos])
while c.buf[pos] in IdentChars:
add(tag, c.buf[pos])
inc(pos)
if buf[pos] == '$': inc(pos)
if c.buf[pos] == '$': inc(pos)
else:
tok.kind = tkInvalid
return
while true:
case buf[pos]
case c.buf[pos]
of '\c', '\L':
pos = handleCRLF(c, pos)
buf = c.buf
add(tok.literal, "\L")
of '\0':
tok.kind = tkInvalid
@@ -252,37 +245,35 @@ proc getDollarString(c: var SqlLexer, tok: var Token) =
of '$':
inc(pos)
var tag2 = "$"
while buf[pos] in IdentChars:
add(tag2, buf[pos])
while c.buf[pos] in IdentChars:
add(tag2, c.buf[pos])
inc(pos)
if buf[pos] == '$': inc(pos)
if c.buf[pos] == '$': inc(pos)
if tag2 == tag: break
add(tok.literal, tag2)
add(tok.literal, '$')
else:
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
c.bufpos = pos
proc getSymbol(c: var SqlLexer, tok: var Token) =
var pos = c.bufpos
var buf = c.buf
while true:
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
if buf[pos] notin {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}:
if c.buf[pos] notin {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}:
break
c.bufpos = pos
tok.kind = tkIdentifier
proc getQuotedIdentifier(c: var SqlLexer, tok: var Token, quote='\"') =
var pos = c.bufpos + 1
var buf = c.buf
tok.kind = tkQuotedIdentifier
while true:
var ch = buf[pos]
var ch = c.buf[pos]
if ch == quote:
if buf[pos+1] == quote:
if c.buf[pos+1] == quote:
inc(pos, 2)
add(tok.literal, quote)
else:
@@ -298,11 +289,10 @@ proc getQuotedIdentifier(c: var SqlLexer, tok: var Token, quote='\"') =
proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) =
var pos = c.bufpos + 1
var buf = c.buf
block parseLoop:
while true:
while true:
var ch = buf[pos]
var ch = c.buf[pos]
if ch in validChars:
add(tok.literal, ch)
inc(pos)
@@ -318,9 +308,8 @@ proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) =
if c.lineNumber > line:
# a new line whitespace has been parsed, so we check if the string
# continues after the whitespace:
buf = c.buf # may have been reallocated
pos = c.bufpos
if buf[pos] == '\'': inc(pos)
if c.buf[pos] == '\'': inc(pos)
else: break parseLoop
else: break parseLoop
c.bufpos = pos
@@ -328,29 +317,28 @@ proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) =
proc getNumeric(c: var SqlLexer, tok: var Token) =
tok.kind = tkInteger
var pos = c.bufpos
var buf = c.buf
while buf[pos] in Digits:
add(tok.literal, buf[pos])
while c.buf[pos] in Digits:
add(tok.literal, c.buf[pos])
inc(pos)
if buf[pos] == '.':
if c.buf[pos] == '.':
tok.kind = tkNumeric
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
while buf[pos] in Digits:
add(tok.literal, buf[pos])
while c.buf[pos] in Digits:
add(tok.literal, c.buf[pos])
inc(pos)
if buf[pos] in {'E', 'e'}:
if c.buf[pos] in {'E', 'e'}:
tok.kind = tkNumeric
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
if buf[pos] == '+':
if c.buf[pos] == '+':
inc(pos)
elif buf[pos] == '-':
add(tok.literal, buf[pos])
elif c.buf[pos] == '-':
add(tok.literal, c.buf[pos])
inc(pos)
if buf[pos] in Digits:
while buf[pos] in Digits:
add(tok.literal, buf[pos])
if c.buf[pos] in Digits:
while c.buf[pos] in Digits:
add(tok.literal, c.buf[pos])
inc(pos)
else:
tok.kind = tkInvalid
@@ -361,24 +349,23 @@ proc getOperator(c: var SqlLexer, tok: var Token) =
'^', '&', '|', '`', '?'}
tok.kind = tkOperator
var pos = c.bufpos
var buf = c.buf
var trailingPlusMinus = false
while true:
case buf[pos]
case c.buf[pos]
of '-':
if buf[pos] == '-': break
if not trailingPlusMinus and buf[pos+1] notin operators and
if c.buf[pos] == '-': break
if not trailingPlusMinus and c.buf[pos+1] notin operators and
tok.literal.len > 0: break
of '/':
if buf[pos] == '*': break
if c.buf[pos] == '*': break
of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?':
trailingPlusMinus = true
of '+':
if not trailingPlusMinus and buf[pos+1] notin operators and
if not trailingPlusMinus and c.buf[pos+1] notin operators and
tok.literal.len > 0: break
of '*', '<', '>', '=': discard
else: break
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
c.bufpos = pos

View File

@@ -345,11 +345,10 @@ proc markError(my: var XmlParser, kind: XmlErrorKind) {.inline.} =
proc parseCDATA(my: var XmlParser) =
var pos = my.bufpos + len("<![CDATA[")
var buf = my.buf
while true:
case buf[pos]
case my.buf[pos]
of ']':
if buf[pos+1] == ']' and buf[pos+2] == '>':
if my.buf[pos+1] == ']' and my.buf[pos+2] == '>':
inc(pos, 3)
break
add(my.a, ']')
@@ -359,29 +358,25 @@ proc parseCDATA(my: var XmlParser) =
break
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
add(my.a, '\L')
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
add(my.a, '\L')
of '/':
pos = lexbase.handleRefillChar(my, pos)
buf = my.buf
add(my.a, '/')
else:
add(my.a, buf[pos])
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos # store back
my.kind = xmlCData
proc parseComment(my: var XmlParser) =
var pos = my.bufpos + len("<!--")
var buf = my.buf
while true:
case buf[pos]
case my.buf[pos]
of '-':
if buf[pos+1] == '-' and buf[pos+2] == '>':
if my.buf[pos+1] == '-' and my.buf[pos+2] == '>':
inc(pos, 3)
break
if my.options.contains(reportComments): add(my.a, '-')
@@ -391,38 +386,32 @@ proc parseComment(my: var XmlParser) =
break
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
if my.options.contains(reportComments): add(my.a, '\L')
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
if my.options.contains(reportComments): add(my.a, '\L')
of '/':
pos = lexbase.handleRefillChar(my, pos)
buf = my.buf
if my.options.contains(reportComments): add(my.a, '/')
else:
if my.options.contains(reportComments): add(my.a, buf[pos])
if my.options.contains(reportComments): add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
my.kind = xmlComment
proc parseWhitespace(my: var XmlParser, skip=false) =
var pos = my.bufpos
var buf = my.buf
while true:
case buf[pos]
case my.buf[pos]
of ' ', '\t':
if not skip: add(my.a, buf[pos])
if not skip: add(my.a, my.buf[pos])
inc(pos)
of '\c':
# the specification says that CR-LF, CR are to be transformed to LF
pos = lexbase.handleCR(my, pos)
buf = my.buf
if not skip: add(my.a, '\L')
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
if not skip: add(my.a, '\L')
else:
break
@@ -434,53 +423,51 @@ const
proc parseName(my: var XmlParser, dest: var string) =
var pos = my.bufpos
var buf = my.buf
if buf[pos] in NameStartChar:
if my.buf[pos] in NameStartChar:
while true:
add(dest, buf[pos])
add(dest, my.buf[pos])
inc(pos)
if buf[pos] notin NameChar: break
if my.buf[pos] notin NameChar: break
my.bufpos = pos
else:
markError(my, errNameExpected)
proc parseEntity(my: var XmlParser, dest: var string) =
var pos = my.bufpos+1
var buf = my.buf
my.kind = xmlCharData
if buf[pos] == '#':
if my.buf[pos] == '#':
var r: int
inc(pos)
if buf[pos] == 'x':
if my.buf[pos] == 'x':
inc(pos)
while true:
case buf[pos]
of '0'..'9': r = (r shl 4) or (ord(buf[pos]) - ord('0'))
of 'a'..'f': r = (r shl 4) or (ord(buf[pos]) - ord('a') + 10)
of 'A'..'F': r = (r shl 4) or (ord(buf[pos]) - ord('A') + 10)
case my.buf[pos]
of '0'..'9': r = (r shl 4) or (ord(my.buf[pos]) - ord('0'))
of 'a'..'f': r = (r shl 4) or (ord(my.buf[pos]) - ord('a') + 10)
of 'A'..'F': r = (r shl 4) or (ord(my.buf[pos]) - ord('A') + 10)
else: break
inc(pos)
else:
while buf[pos] in {'0'..'9'}:
r = r * 10 + (ord(buf[pos]) - ord('0'))
while my.buf[pos] in {'0'..'9'}:
r = r * 10 + (ord(my.buf[pos]) - ord('0'))
inc(pos)
add(dest, toUTF8(Rune(r)))
elif buf[pos] == 'l' and buf[pos+1] == 't' and buf[pos+2] == ';':
elif my.buf[pos] == 'l' and my.buf[pos+1] == 't' and my.buf[pos+2] == ';':
add(dest, '<')
inc(pos, 2)
elif buf[pos] == 'g' and buf[pos+1] == 't' and buf[pos+2] == ';':
elif my.buf[pos] == 'g' and my.buf[pos+1] == 't' and my.buf[pos+2] == ';':
add(dest, '>')
inc(pos, 2)
elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p' and
buf[pos+3] == ';':
elif my.buf[pos] == 'a' and my.buf[pos+1] == 'm' and my.buf[pos+2] == 'p' and
my.buf[pos+3] == ';':
add(dest, '&')
inc(pos, 3)
elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and
buf[pos+3] == 's' and buf[pos+4] == ';':
elif my.buf[pos] == 'a' and my.buf[pos+1] == 'p' and my.buf[pos+2] == 'o' and
my.buf[pos+3] == 's' and my.buf[pos+4] == ';':
add(dest, '\'')
inc(pos, 4)
elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and
buf[pos+3] == 't' and buf[pos+4] == ';':
elif my.buf[pos] == 'q' and my.buf[pos+1] == 'u' and my.buf[pos+2] == 'o' and
my.buf[pos+3] == 't' and my.buf[pos+4] == ';':
add(dest, '"')
inc(pos, 4)
else:
@@ -491,7 +478,7 @@ proc parseEntity(my: var XmlParser, dest: var string) =
my.kind = xmlEntity
else:
add(dest, '&')
if buf[pos] == ';':
if my.buf[pos] == ';':
inc(pos)
else:
markError(my, errSemicolonExpected)
@@ -501,15 +488,14 @@ proc parsePI(my: var XmlParser) =
inc(my.bufpos, "<?".len)
parseName(my, my.a)
var pos = my.bufpos
var buf = my.buf
setLen(my.b, 0)
while true:
case buf[pos]
case my.buf[pos]
of '\0':
markError(my, errQmGtExpected)
break
of '?':
if buf[pos+1] == '>':
if my.buf[pos+1] == '>':
inc(pos, 2)
break
add(my.b, '?')
@@ -517,18 +503,15 @@ proc parsePI(my: var XmlParser) =
of '\c':
# the specification says that CR-LF, CR are to be transformed to LF
pos = lexbase.handleCR(my, pos)
buf = my.buf
add(my.b, '\L')
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
add(my.b, '\L')
of '/':
pos = lexbase.handleRefillChar(my, pos)
buf = my.buf
add(my.b, '/')
else:
add(my.b, buf[pos])
add(my.b, my.buf[pos])
inc(pos)
my.bufpos = pos
my.kind = xmlPI
@@ -536,10 +519,9 @@ proc parsePI(my: var XmlParser) =
proc parseSpecial(my: var XmlParser) =
# things that start with <!
var pos = my.bufpos + 2
var buf = my.buf
var opentags = 0
while true:
case buf[pos]
case my.buf[pos]
of '\0':
markError(my, errGtExpected)
break
@@ -556,18 +538,15 @@ proc parseSpecial(my: var XmlParser) =
add(my.a, '>')
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
add(my.a, '\L')
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
add(my.a, '\L')
of '/':
pos = lexbase.handleRefillChar(my, pos)
buf = my.buf
add(my.b, '/')
else:
add(my.a, buf[pos])
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
my.kind = xmlSpecial
@@ -635,13 +614,12 @@ proc parseAttribute(my: var XmlParser) =
parseWhitespace(my, skip=true)
var pos = my.bufpos
var buf = my.buf
if buf[pos] in {'\'', '"'}:
var quote = buf[pos]
if my.buf[pos] in {'\'', '"'}:
var quote = my.buf[pos]
var pendingSpace = false
inc(pos)
while true:
case buf[pos]
case my.buf[pos]
of '\0':
markError(my, errQuoteExpected)
break
@@ -658,31 +636,28 @@ proc parseAttribute(my: var XmlParser) =
inc(pos)
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
pendingSpace = true
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
pendingSpace = true
of '/':
pos = lexbase.handleRefillChar(my, pos)
buf = my.buf
add(my.b, '/')
else:
if buf[pos] == quote:
if my.buf[pos] == quote:
inc(pos)
break
else:
if pendingSpace:
add(my.b, ' ')
pendingSpace = false
add(my.b, buf[pos])
add(my.b, my.buf[pos])
inc(pos)
elif allowUnquotedAttribs in my.options:
const disallowedChars = {'"', '\'', '`', '=', '<', '>', ' ',
'\0', '\t', '\L', '\F', '\f'}
let startPos = pos
while (let c = buf[pos]; c notin disallowedChars):
while (let c = my.buf[pos]; c notin disallowedChars):
if c == '&':
my.bufpos = pos
parseEntity(my, my.b)
@@ -696,33 +671,29 @@ proc parseAttribute(my: var XmlParser) =
else:
markError(my, errQuoteExpected)
# error corrections: guess what was meant
while buf[pos] != '>' and buf[pos] > ' ':
add(my.b, buf[pos])
while my.buf[pos] != '>' and my.buf[pos] > ' ':
add(my.b, my.buf[pos])
inc pos
my.bufpos = pos
parseWhitespace(my, skip=true)
proc parseCharData(my: var XmlParser) =
var pos = my.bufpos
var buf = my.buf
while true:
case buf[pos]
case my.buf[pos]
of '\0', '<', '&': break
of '\c':
# the specification says that CR-LF, CR are to be transformed to LF
pos = lexbase.handleCR(my, pos)
buf = my.buf
add(my.a, '\L')
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
add(my.a, '\L')
of '/':
pos = lexbase.handleRefillChar(my, pos)
buf = my.buf
add(my.a, '/')
else:
add(my.a, buf[pos])
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
my.kind = xmlCharData
@@ -731,18 +702,17 @@ proc rawGetTok(my: var XmlParser) =
my.kind = xmlError
setLen(my.a, 0)
var pos = my.bufpos
var buf = my.buf
case buf[pos]
case my.buf[pos]
of '<':
case buf[pos+1]
case my.buf[pos+1]
of '/':
parseEndTag(my)
of '!':
if buf[pos+2] == '[' and buf[pos+3] == 'C' and buf[pos+4] == 'D' and
buf[pos+5] == 'A' and buf[pos+6] == 'T' and buf[pos+7] == 'A' and
buf[pos+8] == '[':
if my.buf[pos+2] == '[' and my.buf[pos+3] == 'C' and my.buf[pos+4] == 'D' and
my.buf[pos+5] == 'A' and my.buf[pos+6] == 'T' and my.buf[pos+7] == 'A' and
my.buf[pos+8] == '[':
parseCDATA(my)
elif buf[pos+2] == '-' and buf[pos+3] == '-':
elif my.buf[pos+2] == '-' and my.buf[pos+3] == '-':
parseComment(my)
else:
parseSpecial(my)
@@ -841,4 +811,3 @@ when not defined(testing) and isMainModule:
of xmlSpecial:
echo("SPECIAL: " & x.charData)
close(x)

View File

@@ -1545,20 +1545,17 @@ proc getEscapedChar(c: var PegLexer, tok: var Token) =
proc skip(c: var PegLexer) =
var pos = c.bufpos
var buf = c.buf
while pos < c.buf.len:
case buf[pos]
case c.buf[pos]
of ' ', '\t':
inc(pos)
of '#':
while (pos < c.buf.len) and
not (buf[pos] in {'\c', '\L', '\0'}): inc(pos)
not (c.buf[pos] in {'\c', '\L', '\0'}): inc(pos)
of '\c':
pos = handleCR(c, pos)
buf = c.buf
of '\L':
pos = handleLF(c, pos)
buf = c.buf
else:
break # EndOfFile also leaves the loop
c.bufpos = pos
@@ -1566,10 +1563,9 @@ proc skip(c: var PegLexer) =
proc getString(c: var PegLexer, tok: var Token) =
tok.kind = tkStringLit
var pos = c.bufpos + 1
var buf = c.buf
var quote = buf[pos-1]
var quote = c.buf[pos-1]
while pos < c.buf.len:
case buf[pos]
case c.buf[pos]
of '\\':
c.bufpos = pos
getEscapedChar(c, tok)
@@ -1577,22 +1573,21 @@ proc getString(c: var PegLexer, tok: var Token) =
of '\c', '\L', '\0':
tok.kind = tkInvalid
break
elif buf[pos] == quote:
elif c.buf[pos] == quote:
inc(pos)
break
else:
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
c.bufpos = pos
proc getDollar(c: var PegLexer, tok: var Token) =
var pos = c.bufpos + 1
var buf = c.buf
if buf[pos] in {'0'..'9'}:
if c.buf[pos] in {'0'..'9'}:
tok.kind = tkBackref
tok.index = 0
while pos < c.buf.len and buf[pos] in {'0'..'9'}:
tok.index = tok.index * 10 + ord(buf[pos]) - ord('0')
while pos < c.buf.len and c.buf[pos] in {'0'..'9'}:
tok.index = tok.index * 10 + ord(c.buf[pos]) - ord('0')
inc(pos)
else:
tok.kind = tkDollar
@@ -1602,14 +1597,13 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
tok.kind = tkCharSet
tok.charset = {}
var pos = c.bufpos + 1
var buf = c.buf
var caret = false
if buf[pos] == '^':
if c.buf[pos] == '^':
inc(pos)
caret = true
while pos < c.buf.len:
var ch: char
case buf[pos]
case c.buf[pos]
of ']':
if pos < c.buf.len: inc(pos)
break
@@ -1622,11 +1616,11 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
tok.kind = tkInvalid
break
else:
ch = buf[pos]
ch = c.buf[pos]
inc(pos)
incl(tok.charset, ch)
if buf[pos] == '-':
if pos+1 < c.buf.len and buf[pos+1] == ']':
if c.buf[pos] == '-':
if pos+1 < c.buf.len and c.buf[pos+1] == ']':
incl(tok.charset, '-')
inc(pos)
else:
@@ -1635,7 +1629,7 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
else:
break
var ch2: char
case buf[pos]
case c.buf[pos]
of '\\':
c.bufpos = pos
getEscapedChar(c, tok)
@@ -1646,7 +1640,7 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
break
else:
if pos+1 < c.buf.len:
ch2 = buf[pos]
ch2 = c.buf[pos]
inc(pos)
else:
break
@@ -1657,11 +1651,10 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
proc getSymbol(c: var PegLexer, tok: var Token) =
var pos = c.bufpos
var buf = c.buf
while pos < c.buf.len:
add(tok.literal, buf[pos])
add(tok.literal, c.buf[pos])
inc(pos)
if pos < buf.len and buf[pos] notin strutils.IdentChars: break
if pos < c.buf.len and c.buf[pos] notin strutils.IdentChars: break
c.bufpos = pos
tok.kind = tkIdentifier

View File

@@ -53,12 +53,17 @@ type
{.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.}
getPositionImpl*: proc (s: Stream): int
{.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.}
readDataStrImpl*: proc (s: Stream, buffer: var string, slice: Slice[int]): int
{.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.}
readDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int): int
{.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.}
peekDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int): int
{.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.}
writeDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int)
{.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.}
{.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.}
flushImpl*: proc (s: Stream)
{.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.}
@@ -87,6 +92,14 @@ proc readData*(s: Stream, buffer: pointer, bufLen: int): int =
## low level proc that reads data into an untyped `buffer` of `bufLen` size.
result = s.readDataImpl(s, buffer, bufLen)
proc readDataStr*(s: Stream, buffer: var string, slice: Slice[int]): int =
## low level proc that reads data into a string ``buffer`` at ``slice``.
if s.readDataStrImpl != nil:
result = s.readDataStrImpl(s, buffer, slice)
else:
# fallback
result = s.readData(addr buffer[0], buffer.len)
when not defined(js):
proc readAll*(s: Stream): string =
## Reads all available data.
@@ -344,6 +357,19 @@ when not defined(js):
var s = StringStream(s)
return s.pos
proc ssReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int =
var s = StringStream(s)
result = min(slice.b + 1 - slice.a, s.data.len - s.pos)
if result > 0:
when nimvm:
for i in 0 ..< result: # sorry, but no fast string splicing on the vm.
buffer[slice.a + i] = s.data[s.pos + i]
else:
copyMem(unsafeAddr buffer[slice.a], addr s.data[s.pos], result)
inc(s.pos, result)
else:
result = 0
proc ssReadData(s: Stream, buffer: pointer, bufLen: int): int =
var s = StringStream(s)
result = min(bufLen, s.data.len - s.pos)
@@ -389,6 +415,7 @@ when not defined(js):
result.readDataImpl = ssReadData
result.peekDataImpl = ssPeekData
result.writeDataImpl = ssWriteData
result.readDataStrImpl = ssReadDataStr
type
FileStream* = ref FileStreamObj ## a stream that encapsulates a `File`
@@ -407,6 +434,9 @@ when not defined(js):
proc fsReadData(s: Stream, buffer: pointer, bufLen: int): int =
result = readBuffer(FileStream(s).f, buffer, bufLen)
proc fsReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int =
result = readBuffer(FileStream(s).f, addr buffer[slice.a], slice.b + 1 - slice.a)
proc fsPeekData(s: Stream, buffer: pointer, bufLen: int): int =
let pos = fsGetPosition(s)
defer: fsSetPosition(s, pos)
@@ -424,6 +454,7 @@ when not defined(js):
result.atEndImpl = fsAtEnd
result.setPositionImpl = fsSetPosition
result.getPositionImpl = fsGetPosition
result.readDataStrImpl = fsReadDataStr
result.readDataImpl = fsReadData
result.peekDataImpl = fsPeekData
result.writeDataImpl = fsWriteData

View File

@@ -124,9 +124,8 @@ proc handleHexChar(c: char, x: var int): bool =
proc parseString(my: var SexpParser): TTokKind =
result = tkString
var pos = my.bufpos + 1
var buf = my.buf
while true:
case buf[pos]
case my.buf[pos]
of '\0':
my.err = errQuoteExpected
result = tkError
@@ -135,9 +134,9 @@ proc parseString(my: var SexpParser): TTokKind =
inc(pos)
break
of '\\':
case buf[pos+1]
case my.buf[pos+1]
of '\\', '"', '\'', '/':
add(my.a, buf[pos+1])
add(my.a, my.buf[pos+1])
inc(pos, 2)
of 'b':
add(my.a, '\b')
@@ -157,65 +156,61 @@ proc parseString(my: var SexpParser): TTokKind =
of 'u':
inc(pos, 2)
var r: int
if handleHexChar(buf[pos], r): inc(pos)
if handleHexChar(buf[pos], r): inc(pos)
if handleHexChar(buf[pos], r): inc(pos)
if handleHexChar(buf[pos], r): inc(pos)
if handleHexChar(my.buf[pos], r): inc(pos)
if handleHexChar(my.buf[pos], r): inc(pos)
if handleHexChar(my.buf[pos], r): inc(pos)
if handleHexChar(my.buf[pos], r): inc(pos)
add(my.a, toUTF8(Rune(r)))
else:
# don't bother with the error
add(my.a, buf[pos])
add(my.a, my.buf[pos])
inc(pos)
of '\c':
pos = lexbase.handleCR(my, pos)
buf = my.buf
add(my.a, '\c')
of '\L':
pos = lexbase.handleLF(my, pos)
buf = my.buf
add(my.a, '\L')
else:
add(my.a, buf[pos])
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos # store back
proc parseNumber(my: var SexpParser) =
var pos = my.bufpos
var buf = my.buf
if buf[pos] == '-':
if my.buf[pos] == '-':
add(my.a, '-')
inc(pos)
if buf[pos] == '.':
if my.buf[pos] == '.':
add(my.a, "0.")
inc(pos)
else:
while buf[pos] in Digits:
add(my.a, buf[pos])
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if buf[pos] == '.':
if my.buf[pos] == '.':
add(my.a, '.')
inc(pos)
# digits after the dot:
while buf[pos] in Digits:
add(my.a, buf[pos])
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
if buf[pos] in {'E', 'e'}:
add(my.a, buf[pos])
if my.buf[pos] in {'E', 'e'}:
add(my.a, my.buf[pos])
inc(pos)
if buf[pos] in {'+', '-'}:
add(my.a, buf[pos])
if my.buf[pos] in {'+', '-'}:
add(my.a, my.buf[pos])
inc(pos)
while buf[pos] in Digits:
add(my.a, buf[pos])
while my.buf[pos] in Digits:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos
proc parseSymbol(my: var SexpParser) =
var pos = my.bufpos
var buf = my.buf
if buf[pos] in IdentStartChars:
while buf[pos] in IdentChars:
add(my.a, buf[pos])
if my.buf[pos] in IdentStartChars:
while my.buf[pos] in IdentChars:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos

View File

@@ -520,3 +520,43 @@ when true:
block test_tuple:
doAssert $(%* (a1: 10, a2: "foo")) == """{"a1":10,"a2":"foo"}"""
doAssert $(%* (10, "foo")) == """[10,"foo"]"""
# TODO: when the issue with the limeted vm registers is solved, the
# exact same test as above should be evaluated at compile time as
# well, to ensure that the vm functionality won't diverge from the
# runtime functionality. Until then, the following test should do it.
static:
var t = parseJson("""
{
"name":"Bongo",
"email":"bongo@bingo.com",
"list": [11,7,15],
"year": 1975,
"dict": {"a": 1, "b": 2},
"arr": [1.0, 2.0, 7.0],
"person": {"name": "boney"},
"dog": {"name": "honey"},
"fruit": {"color": 10},
"distfruit": {"color": 11},
"emails": ["abc", "123"]
}
""")
doAssert t["name"].getStr == "Bongo"
doAssert t["email"].getStr == "bongo@bingo.com"
doAssert t["list"][0].getInt == 11
doAssert t["list"][1].getInt == 7
doAssert t["list"][2].getInt == 15
doAssert t["year"].getInt == 1975
doAssert t["dict"]["a"].getInt == 1
doAssert t["dict"]["b"].getInt == 2
doAssert t["arr"][0].getFloat == 1.0
doAssert t["arr"][1].getFloat == 2.0
doAssert t["arr"][2].getFloat == 7.0
doAssert t["person"]["name"].getStr == "boney"
doAssert t["distfruit"]["color"].getInt == 11
doAssert t["dog"]["name"].getStr == "honey"
doAssert t["fruit"]["color"].getInt == 10
doAssert t["emails"][0].getStr == "abc"
doAssert t["emails"][1].getStr == "123"