mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-28 17:04:41 +00:00
StringStream and parseJson, parseCfg, parseSql et al for the vm (#10746)
This commit is contained in:
committed by
Andreas Rumpf
parent
728ff1004a
commit
1102b8ac6e
@@ -145,38 +145,34 @@ proc atEndMark(buf: cstring, pos: int): bool =
|
||||
|
||||
proc readVerbatimSection(L: var TBaseLexer): Rope =
|
||||
var pos = L.bufpos
|
||||
var buf = L.buf
|
||||
var r = newStringOfCap(30_000)
|
||||
while true:
|
||||
case buf[pos]
|
||||
case L.buf[pos]
|
||||
of CR:
|
||||
pos = nimlexbase.handleCR(L, pos)
|
||||
buf = L.buf
|
||||
r.add('\L')
|
||||
of LF:
|
||||
pos = nimlexbase.handleLF(L, pos)
|
||||
buf = L.buf
|
||||
r.add('\L')
|
||||
of '\0':
|
||||
doAssert(false, "ccgmerge: expected: " & NimMergeEndMark)
|
||||
break
|
||||
else:
|
||||
if atEndMark(buf, pos):
|
||||
if atEndMark(L.buf, pos):
|
||||
inc pos, NimMergeEndMark.len
|
||||
break
|
||||
r.add(buf[pos])
|
||||
r.add(L.buf[pos])
|
||||
inc pos
|
||||
L.bufpos = pos
|
||||
result = r.rope
|
||||
|
||||
proc readKey(L: var TBaseLexer, result: var string) =
|
||||
var pos = L.bufpos
|
||||
var buf = L.buf
|
||||
setLen(result, 0)
|
||||
while buf[pos] in IdentChars:
|
||||
result.add(buf[pos])
|
||||
while L.buf[pos] in IdentChars:
|
||||
result.add(L.buf[pos])
|
||||
inc pos
|
||||
if buf[pos] != ':': doAssert(false, "ccgmerge: ':' expected")
|
||||
if L.buf[pos] != ':': doAssert(false, "ccgmerge: ':' expected")
|
||||
L.bufpos = pos + 1 # skip ':'
|
||||
|
||||
proc newFakeType(id: int): PType =
|
||||
|
||||
@@ -318,17 +318,16 @@ template eatChar(L: var TLexer, t: var TToken) =
|
||||
proc getNumber(L: var TLexer, result: var TToken) =
|
||||
proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]): Natural =
|
||||
var pos = L.bufpos # use registers for pos, buf
|
||||
var buf = L.buf
|
||||
result = 0
|
||||
while true:
|
||||
if buf[pos] in chars:
|
||||
add(tok.literal, buf[pos])
|
||||
if L.buf[pos] in chars:
|
||||
add(tok.literal, L.buf[pos])
|
||||
inc(pos)
|
||||
inc(result)
|
||||
else:
|
||||
break
|
||||
if buf[pos] == '_':
|
||||
if buf[pos+1] notin chars:
|
||||
if L.buf[pos] == '_':
|
||||
if L.buf[pos+1] notin chars:
|
||||
lexMessage(L, errGenerated,
|
||||
"only single underscores may occur in a token and token may not " &
|
||||
"end with an underscore: e.g. '1__1' and '1_' are invalid")
|
||||
@@ -339,9 +338,8 @@ proc getNumber(L: var TLexer, result: var TToken) =
|
||||
|
||||
proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) =
|
||||
var pos = L.bufpos # use registers for pos, buf
|
||||
var buf = L.buf
|
||||
while buf[pos] in chars:
|
||||
add(tok.literal, buf[pos])
|
||||
while L.buf[pos] in chars:
|
||||
add(tok.literal, L.buf[pos])
|
||||
inc(pos)
|
||||
L.bufpos = pos
|
||||
|
||||
@@ -800,25 +798,23 @@ type
|
||||
|
||||
proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
|
||||
var pos = L.bufpos
|
||||
var buf = L.buf # put `buf` in a register
|
||||
var line = L.lineNumber # save linenumber for better error message
|
||||
tokenBegin(tok, pos - ord(mode == raw))
|
||||
inc pos # skip "
|
||||
if buf[pos] == '\"' and buf[pos+1] == '\"':
|
||||
if L.buf[pos] == '\"' and L.buf[pos+1] == '\"':
|
||||
tok.tokType = tkTripleStrLit # long string literal:
|
||||
inc(pos, 2) # skip ""
|
||||
# skip leading newline:
|
||||
if buf[pos] in {' ', '\t'}:
|
||||
if L.buf[pos] in {' ', '\t'}:
|
||||
var newpos = pos+1
|
||||
while buf[newpos] in {' ', '\t'}: inc newpos
|
||||
if buf[newpos] in {CR, LF}: pos = newpos
|
||||
while L.buf[newpos] in {' ', '\t'}: inc newpos
|
||||
if L.buf[newpos] in {CR, LF}: pos = newpos
|
||||
pos = handleCRLF(L, pos)
|
||||
buf = L.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case L.buf[pos]
|
||||
of '\"':
|
||||
if buf[pos+1] == '\"' and buf[pos+2] == '\"' and
|
||||
buf[pos+3] != '\"':
|
||||
if L.buf[pos+1] == '\"' and L.buf[pos+2] == '\"' and
|
||||
L.buf[pos+3] != '\"':
|
||||
tokenEndIgnore(tok, pos+2)
|
||||
L.bufpos = pos + 3 # skip the three """
|
||||
break
|
||||
@@ -827,7 +823,6 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
|
||||
of CR, LF:
|
||||
tokenEndIgnore(tok, pos)
|
||||
pos = handleCRLF(L, pos)
|
||||
buf = L.buf
|
||||
add(tok.literal, "\n")
|
||||
of nimlexbase.EndOfFile:
|
||||
tokenEndIgnore(tok, pos)
|
||||
@@ -838,16 +833,16 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
|
||||
L.bufpos = pos
|
||||
break
|
||||
else:
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, L.buf[pos])
|
||||
inc(pos)
|
||||
else:
|
||||
# ordinary string literal
|
||||
if mode != normal: tok.tokType = tkRStrLit
|
||||
else: tok.tokType = tkStrLit
|
||||
while true:
|
||||
var c = buf[pos]
|
||||
var c = L.buf[pos]
|
||||
if c == '\"':
|
||||
if mode != normal and buf[pos+1] == '\"':
|
||||
if mode != normal and L.buf[pos+1] == '\"':
|
||||
inc(pos, 2)
|
||||
add(tok.literal, '"')
|
||||
else:
|
||||
@@ -885,10 +880,9 @@ proc getCharacter(L: var TLexer, tok: var TToken) =
|
||||
proc getSymbol(L: var TLexer, tok: var TToken) =
|
||||
var h: Hash = 0
|
||||
var pos = L.bufpos
|
||||
var buf = L.buf
|
||||
tokenBegin(tok, pos)
|
||||
while true:
|
||||
var c = buf[pos]
|
||||
var c = L.buf[pos]
|
||||
case c
|
||||
of 'a'..'z', '0'..'9', '\x80'..'\xFF':
|
||||
h = h !& ord(c)
|
||||
@@ -898,7 +892,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
|
||||
h = h !& ord(c)
|
||||
inc(pos)
|
||||
of '_':
|
||||
if buf[pos+1] notin SymChars:
|
||||
if L.buf[pos+1] notin SymChars:
|
||||
lexMessage(L, errGenerated, "invalid token: trailing underscore")
|
||||
break
|
||||
inc(pos)
|
||||
@@ -923,11 +917,10 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int,
|
||||
|
||||
proc getOperator(L: var TLexer, tok: var TToken) =
|
||||
var pos = L.bufpos
|
||||
var buf = L.buf
|
||||
tokenBegin(tok, pos)
|
||||
var h: Hash = 0
|
||||
while true:
|
||||
var c = buf[pos]
|
||||
var c = L.buf[pos]
|
||||
if c notin OpChars: break
|
||||
h = h !& ord(c)
|
||||
inc(pos)
|
||||
@@ -936,10 +929,10 @@ proc getOperator(L: var TLexer, tok: var TToken) =
|
||||
# advance pos but don't store it in L.bufpos so the next token (which might
|
||||
# be an operator too) gets the preceding spaces:
|
||||
tok.strongSpaceB = 0
|
||||
while buf[pos] == ' ':
|
||||
while L.buf[pos] == ' ':
|
||||
inc pos
|
||||
inc tok.strongSpaceB
|
||||
if buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
|
||||
if L.buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
|
||||
tok.strongSpaceB = -1
|
||||
|
||||
proc getPrecedence*(tok: TToken, strongSpaces: bool): int =
|
||||
@@ -980,9 +973,8 @@ proc getPrecedence*(tok: TToken, strongSpaces: bool): int =
|
||||
|
||||
proc newlineFollows*(L: TLexer): bool =
|
||||
var pos = L.bufpos
|
||||
var buf = L.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case L.buf[pos]
|
||||
of ' ', '\t':
|
||||
inc(pos)
|
||||
of CR, LF:
|
||||
@@ -990,49 +982,47 @@ proc newlineFollows*(L: TLexer): bool =
|
||||
break
|
||||
of '#':
|
||||
inc(pos)
|
||||
if buf[pos] == '#': inc(pos)
|
||||
if buf[pos] != '[': return true
|
||||
if L.buf[pos] == '#': inc(pos)
|
||||
if L.buf[pos] != '[': return true
|
||||
else:
|
||||
break
|
||||
|
||||
proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
|
||||
isDoc: bool) =
|
||||
var pos = start
|
||||
var buf = L.buf
|
||||
var toStrip = 0
|
||||
tokenBegin(tok, pos)
|
||||
# detect the amount of indentation:
|
||||
if isDoc:
|
||||
toStrip = getColNumber(L, pos)
|
||||
while buf[pos] == ' ': inc pos
|
||||
if buf[pos] in {CR, LF}:
|
||||
while L.buf[pos] == ' ': inc pos
|
||||
if L.buf[pos] in {CR, LF}:
|
||||
pos = handleCRLF(L, pos)
|
||||
buf = L.buf
|
||||
toStrip = 0
|
||||
while buf[pos] == ' ':
|
||||
while L.buf[pos] == ' ':
|
||||
inc pos
|
||||
inc toStrip
|
||||
var nesting = 0
|
||||
while true:
|
||||
case buf[pos]
|
||||
case L.buf[pos]
|
||||
of '#':
|
||||
if isDoc:
|
||||
if buf[pos+1] == '#' and buf[pos+2] == '[':
|
||||
if L.buf[pos+1] == '#' and L.buf[pos+2] == '[':
|
||||
inc nesting
|
||||
tok.literal.add '#'
|
||||
elif buf[pos+1] == '[':
|
||||
elif L.buf[pos+1] == '[':
|
||||
inc nesting
|
||||
inc pos
|
||||
of ']':
|
||||
if isDoc:
|
||||
if buf[pos+1] == '#' and buf[pos+2] == '#':
|
||||
if L.buf[pos+1] == '#' and L.buf[pos+2] == '#':
|
||||
if nesting == 0:
|
||||
tokenEndIgnore(tok, pos+2)
|
||||
inc(pos, 3)
|
||||
break
|
||||
dec nesting
|
||||
tok.literal.add ']'
|
||||
elif buf[pos+1] == '#':
|
||||
elif L.buf[pos+1] == '#':
|
||||
if nesting == 0:
|
||||
tokenEndIgnore(tok, pos+1)
|
||||
inc(pos, 2)
|
||||
@@ -1042,14 +1032,13 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
|
||||
of CR, LF:
|
||||
tokenEndIgnore(tok, pos)
|
||||
pos = handleCRLF(L, pos)
|
||||
buf = L.buf
|
||||
# strip leading whitespace:
|
||||
when defined(nimpretty): tok.literal.add "\L"
|
||||
if isDoc:
|
||||
when not defined(nimpretty): tok.literal.add "\n"
|
||||
inc tok.iNumber
|
||||
var c = toStrip
|
||||
while buf[pos] == ' ' and c > 0:
|
||||
while L.buf[pos] == ' ' and c > 0:
|
||||
inc pos
|
||||
dec c
|
||||
of nimlexbase.EndOfFile:
|
||||
@@ -1057,7 +1046,7 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
|
||||
lexMessagePos(L, errGenerated, pos, "end of multiline comment expected")
|
||||
break
|
||||
else:
|
||||
if isDoc or defined(nimpretty): tok.literal.add buf[pos]
|
||||
if isDoc or defined(nimpretty): tok.literal.add L.buf[pos]
|
||||
inc(pos)
|
||||
L.bufpos = pos
|
||||
when defined(nimpretty):
|
||||
@@ -1065,49 +1054,47 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
|
||||
|
||||
proc scanComment(L: var TLexer, tok: var TToken) =
|
||||
var pos = L.bufpos
|
||||
var buf = L.buf
|
||||
tok.tokType = tkComment
|
||||
# iNumber contains the number of '\n' in the token
|
||||
tok.iNumber = 0
|
||||
assert buf[pos+1] == '#'
|
||||
assert L.buf[pos+1] == '#'
|
||||
when defined(nimpretty):
|
||||
tok.commentOffsetA = L.offsetBase + pos - 1
|
||||
|
||||
if buf[pos+2] == '[':
|
||||
if L.buf[pos+2] == '[':
|
||||
skipMultiLineComment(L, tok, pos+3, true)
|
||||
return
|
||||
tokenBegin(tok, pos)
|
||||
inc(pos, 2)
|
||||
|
||||
var toStrip = 0
|
||||
while buf[pos] == ' ':
|
||||
while L.buf[pos] == ' ':
|
||||
inc pos
|
||||
inc toStrip
|
||||
|
||||
while true:
|
||||
var lastBackslash = -1
|
||||
while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
|
||||
if buf[pos] == '\\': lastBackslash = pos+1
|
||||
add(tok.literal, buf[pos])
|
||||
while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
|
||||
if L.buf[pos] == '\\': lastBackslash = pos+1
|
||||
add(tok.literal, L.buf[pos])
|
||||
inc(pos)
|
||||
tokenEndIgnore(tok, pos)
|
||||
pos = handleCRLF(L, pos)
|
||||
buf = L.buf
|
||||
var indent = 0
|
||||
while buf[pos] == ' ':
|
||||
while L.buf[pos] == ' ':
|
||||
inc(pos)
|
||||
inc(indent)
|
||||
|
||||
if buf[pos] == '#' and buf[pos+1] == '#':
|
||||
if L.buf[pos] == '#' and L.buf[pos+1] == '#':
|
||||
tok.literal.add "\n"
|
||||
inc(pos, 2)
|
||||
var c = toStrip
|
||||
while buf[pos] == ' ' and c > 0:
|
||||
while L.buf[pos] == ' ' and c > 0:
|
||||
inc pos
|
||||
dec c
|
||||
inc tok.iNumber
|
||||
else:
|
||||
if buf[pos] > ' ':
|
||||
if L.buf[pos] > ' ':
|
||||
L.indentAhead = indent
|
||||
tokenEndIgnore(tok, pos)
|
||||
break
|
||||
@@ -1117,7 +1104,6 @@ proc scanComment(L: var TLexer, tok: var TToken) =
|
||||
|
||||
proc skip(L: var TLexer, tok: var TToken) =
|
||||
var pos = L.bufpos
|
||||
var buf = L.buf
|
||||
tokenBegin(tok, pos)
|
||||
tok.strongSpaceA = 0
|
||||
when defined(nimpretty):
|
||||
@@ -1127,7 +1113,7 @@ proc skip(L: var TLexer, tok: var TToken) =
|
||||
tok.commentOffsetB = tok.commentOffsetA
|
||||
tok.line = -1
|
||||
while true:
|
||||
case buf[pos]
|
||||
case L.buf[pos]
|
||||
of ' ':
|
||||
inc(pos)
|
||||
inc(tok.strongSpaceA)
|
||||
@@ -1137,13 +1123,12 @@ proc skip(L: var TLexer, tok: var TToken) =
|
||||
of CR, LF:
|
||||
tokenEndPrevious(tok, pos)
|
||||
pos = handleCRLF(L, pos)
|
||||
buf = L.buf
|
||||
var indent = 0
|
||||
while true:
|
||||
if buf[pos] == ' ':
|
||||
if L.buf[pos] == ' ':
|
||||
inc(pos)
|
||||
inc(indent)
|
||||
elif buf[pos] == '#' and buf[pos+1] == '[':
|
||||
elif L.buf[pos] == '#' and L.buf[pos+1] == '[':
|
||||
when defined(nimpretty):
|
||||
hasComment = true
|
||||
if tok.line < 0:
|
||||
@@ -1151,32 +1136,30 @@ proc skip(L: var TLexer, tok: var TToken) =
|
||||
commentIndent = indent
|
||||
skipMultiLineComment(L, tok, pos+2, false)
|
||||
pos = L.bufpos
|
||||
buf = L.buf
|
||||
else:
|
||||
break
|
||||
tok.strongSpaceA = 0
|
||||
when defined(nimpretty):
|
||||
if buf[pos] == '#' and tok.line < 0: commentIndent = indent
|
||||
if buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#'):
|
||||
if L.buf[pos] == '#' and tok.line < 0: commentIndent = indent
|
||||
if L.buf[pos] > ' ' and (L.buf[pos] != '#' or L.buf[pos+1] == '#'):
|
||||
tok.indent = indent
|
||||
L.currLineIndent = indent
|
||||
break
|
||||
of '#':
|
||||
# do not skip documentation comment:
|
||||
if buf[pos+1] == '#': break
|
||||
if L.buf[pos+1] == '#': break
|
||||
when defined(nimpretty):
|
||||
hasComment = true
|
||||
if tok.line < 0:
|
||||
tok.line = L.lineNumber
|
||||
|
||||
if buf[pos+1] == '[':
|
||||
if L.buf[pos+1] == '[':
|
||||
skipMultiLineComment(L, tok, pos+2, false)
|
||||
pos = L.bufpos
|
||||
buf = L.buf
|
||||
else:
|
||||
tokenBegin(tok, pos)
|
||||
while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
|
||||
when defined(nimpretty): tok.literal.add buf[pos]
|
||||
while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
|
||||
when defined(nimpretty): tok.literal.add L.buf[pos]
|
||||
inc(pos)
|
||||
tokenEndIgnore(tok, pos+1)
|
||||
when defined(nimpretty):
|
||||
|
||||
@@ -39,8 +39,7 @@ const
|
||||
type
|
||||
TBaseLexer* = object of RootObj
|
||||
bufpos*: int
|
||||
buf*: cstring
|
||||
bufLen*: int # length of buffer in characters
|
||||
buf*: string
|
||||
stream*: PLLStream # we read from this stream
|
||||
lineNumber*: int # the current line number
|
||||
# private data:
|
||||
@@ -65,11 +64,7 @@ proc handleLF*(L: var TBaseLexer, pos: int): int
|
||||
# of the LF.
|
||||
# implementation
|
||||
|
||||
const
|
||||
chrSize = sizeof(char)
|
||||
|
||||
proc closeBaseLexer(L: var TBaseLexer) =
|
||||
dealloc(L.buf)
|
||||
llStreamClose(L.stream)
|
||||
|
||||
proc fillBuffer(L: var TBaseLexer) =
|
||||
@@ -80,14 +75,13 @@ proc fillBuffer(L: var TBaseLexer) =
|
||||
oldBufLen: int
|
||||
# we know here that pos == L.sentinel, but not if this proc
|
||||
# is called the first time by initBaseLexer()
|
||||
assert(L.sentinel < L.bufLen)
|
||||
toCopy = L.bufLen - L.sentinel - 1
|
||||
assert(L.sentinel < L.buf.len)
|
||||
toCopy = L.buf.len - L.sentinel - 1
|
||||
assert(toCopy >= 0)
|
||||
if toCopy > 0:
|
||||
moveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize)
|
||||
moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy)
|
||||
# "moveMem" handles overlapping regions
|
||||
charsRead = llStreamRead(L.stream, addr(L.buf[toCopy]),
|
||||
(L.sentinel + 1) * chrSize) div chrSize
|
||||
charsRead = llStreamRead(L.stream, addr L.buf[toCopy], L.sentinel + 1)
|
||||
s = toCopy + charsRead
|
||||
if charsRead < L.sentinel + 1:
|
||||
L.buf[s] = EndOfFile # set end marker
|
||||
@@ -96,7 +90,7 @@ proc fillBuffer(L: var TBaseLexer) =
|
||||
# compute sentinel:
|
||||
dec(s) # BUGFIX (valgrind)
|
||||
while true:
|
||||
assert(s < L.bufLen)
|
||||
assert(s < L.buf.len)
|
||||
while (s >= 0) and not (L.buf[s] in NewLines): dec(s)
|
||||
if s >= 0:
|
||||
# we found an appropriate character for a sentinel:
|
||||
@@ -105,17 +99,16 @@ proc fillBuffer(L: var TBaseLexer) =
|
||||
else:
|
||||
# rather than to give up here because the line is too long,
|
||||
# double the buffer's size and try again:
|
||||
oldBufLen = L.bufLen
|
||||
L.bufLen = L.bufLen * 2
|
||||
L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize))
|
||||
assert(L.bufLen - oldBufLen == oldBufLen)
|
||||
oldBufLen = L.buf.len
|
||||
L.buf.setLen(L.buf.len * 2)
|
||||
assert(L.buf.len - oldBufLen == oldBufLen)
|
||||
charsRead = llStreamRead(L.stream, addr(L.buf[oldBufLen]),
|
||||
oldBufLen * chrSize) div chrSize
|
||||
oldBufLen)
|
||||
if charsRead < oldBufLen:
|
||||
L.buf[oldBufLen + charsRead] = EndOfFile
|
||||
L.sentinel = oldBufLen + charsRead
|
||||
break
|
||||
s = L.bufLen - 1
|
||||
s = L.buf.len - 1
|
||||
|
||||
proc fillBaseLexer(L: var TBaseLexer, pos: int): int =
|
||||
assert(pos <= L.sentinel)
|
||||
@@ -149,8 +142,7 @@ proc openBaseLexer(L: var TBaseLexer, inputstream: PLLStream, bufLen = 8192) =
|
||||
assert(bufLen > 0)
|
||||
L.bufpos = 0
|
||||
L.offsetBase = 0
|
||||
L.bufLen = bufLen
|
||||
L.buf = cast[cstring](alloc(bufLen * chrSize))
|
||||
L.buf = newString(bufLen)
|
||||
L.sentinel = bufLen - 1
|
||||
L.lineStart = 0
|
||||
L.lineNumber = 1 # lines start at 1
|
||||
|
||||
@@ -2039,6 +2039,8 @@ proc gen(c: PCtx; n: PNode; dest: var TDest; flags: TGenFlags = {}) =
|
||||
genConv(c, n, n.sons[1], dest)
|
||||
of nkObjDownConv:
|
||||
genConv(c, n, n.sons[0], dest)
|
||||
of nkObjUpConv:
|
||||
genConv(c, n, n.sons[0], dest)
|
||||
of nkVarSection, nkLetSection:
|
||||
unused(c, n, dest)
|
||||
genVarSection(c, n)
|
||||
|
||||
@@ -155,18 +155,17 @@ proc getAdornment(L: var Lexer, tok: var Token) =
|
||||
|
||||
proc getIndentAux(L: var Lexer, start: int): int =
|
||||
var pos = start
|
||||
var buf = L.buf
|
||||
# skip the newline (but include it in the token!)
|
||||
if buf[pos] == '\x0D':
|
||||
if buf[pos + 1] == '\x0A': inc(pos, 2)
|
||||
if L.buf[pos] == '\x0D':
|
||||
if L.buf[pos + 1] == '\x0A': inc(pos, 2)
|
||||
else: inc(pos)
|
||||
elif buf[pos] == '\x0A':
|
||||
elif L.buf[pos] == '\x0A':
|
||||
inc(pos)
|
||||
if L.skipPounds:
|
||||
if buf[pos] == '#': inc(pos)
|
||||
if buf[pos] == '#': inc(pos)
|
||||
if L.buf[pos] == '#': inc(pos)
|
||||
if L.buf[pos] == '#': inc(pos)
|
||||
while true:
|
||||
case buf[pos]
|
||||
case L.buf[pos]
|
||||
of ' ', '\x0B', '\x0C':
|
||||
inc(pos)
|
||||
inc(result)
|
||||
@@ -175,9 +174,9 @@ proc getIndentAux(L: var Lexer, start: int): int =
|
||||
result = result - (result mod 8) + 8
|
||||
else:
|
||||
break # EndOfFile also leaves the loop
|
||||
if buf[pos] == '\0':
|
||||
if L.buf[pos] == '\0':
|
||||
result = 0
|
||||
elif (buf[pos] == '\x0A') or (buf[pos] == '\x0D'):
|
||||
elif (L.buf[pos] == '\x0A') or (L.buf[pos] == '\x0D'):
|
||||
# look at the next line for proper indentation:
|
||||
result = getIndentAux(L, pos)
|
||||
L.bufpos = pos # no need to set back buf
|
||||
|
||||
@@ -28,11 +28,7 @@ type
|
||||
BaseLexer* = object of RootObj ## the base lexer. Inherit your lexer from
|
||||
## this object.
|
||||
bufpos*: int ## the current position within the buffer
|
||||
when defined(js): ## the buffer itself
|
||||
buf*: string
|
||||
else:
|
||||
buf*: cstring
|
||||
bufLen*: int ## length of buffer in characters
|
||||
buf*: string ## the buffer itself
|
||||
input: Stream ## the input stream
|
||||
lineNumber*: int ## the current line number
|
||||
sentinel: int
|
||||
@@ -40,13 +36,8 @@ type
|
||||
offsetBase*: int # use ``offsetBase + bufpos`` to get the offset
|
||||
refillChars: set[char]
|
||||
|
||||
const
|
||||
chrSize = sizeof(char)
|
||||
|
||||
proc close*(L: var BaseLexer) =
|
||||
## closes the base lexer. This closes `L`'s associated stream too.
|
||||
when not defined(js):
|
||||
dealloc(L.buf)
|
||||
close(L.input)
|
||||
|
||||
proc fillBuffer(L: var BaseLexer) =
|
||||
@@ -57,17 +48,21 @@ proc fillBuffer(L: var BaseLexer) =
|
||||
oldBufLen: int
|
||||
# we know here that pos == L.sentinel, but not if this proc
|
||||
# is called the first time by initBaseLexer()
|
||||
assert(L.sentinel < L.bufLen)
|
||||
toCopy = L.bufLen - L.sentinel - 1
|
||||
assert(L.sentinel + 1 <= L.buf.len)
|
||||
toCopy = L.buf.len - (L.sentinel + 1)
|
||||
assert(toCopy >= 0)
|
||||
if toCopy > 0:
|
||||
when defined(js):
|
||||
for i in 0 ..< toCopy: L.buf[i] = L.buf[L.sentinel + 1 + i]
|
||||
for i in 0 ..< toCopy:
|
||||
L.buf[i] = L.buf[L.sentinel + 1 + i]
|
||||
else:
|
||||
# "moveMem" handles overlapping regions
|
||||
moveMem(L.buf, addr L.buf[L.sentinel + 1], toCopy * chrSize)
|
||||
charsRead = readData(L.input, addr(L.buf[toCopy]),
|
||||
(L.sentinel + 1) * chrSize) div chrSize
|
||||
when nimvm:
|
||||
for i in 0 ..< toCopy:
|
||||
L.buf[i] = L.buf[L.sentinel + 1 + i]
|
||||
else:
|
||||
# "moveMem" handles overlapping regions
|
||||
moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy)
|
||||
charsRead = L.input.readDataStr(L.buf, toCopy ..< toCopy + L.sentinel + 1)
|
||||
s = toCopy + charsRead
|
||||
if charsRead < L.sentinel + 1:
|
||||
L.buf[s] = EndOfFile # set end marker
|
||||
@@ -76,7 +71,7 @@ proc fillBuffer(L: var BaseLexer) =
|
||||
# compute sentinel:
|
||||
dec(s) # BUGFIX (valgrind)
|
||||
while true:
|
||||
assert(s < L.bufLen)
|
||||
assert(s < L.buf.len)
|
||||
while s >= 0 and L.buf[s] notin L.refillChars: dec(s)
|
||||
if s >= 0:
|
||||
# we found an appropriate character for a sentinel:
|
||||
@@ -85,20 +80,14 @@ proc fillBuffer(L: var BaseLexer) =
|
||||
else:
|
||||
# rather than to give up here because the line is too long,
|
||||
# double the buffer's size and try again:
|
||||
oldBufLen = L.bufLen
|
||||
L.bufLen = L.bufLen * 2
|
||||
when defined(js):
|
||||
L.buf.setLen(L.bufLen)
|
||||
else:
|
||||
L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize))
|
||||
assert(L.bufLen - oldBufLen == oldBufLen)
|
||||
charsRead = readData(L.input, addr(L.buf[oldBufLen]),
|
||||
oldBufLen * chrSize) div chrSize
|
||||
oldBufLen = L.buf.len
|
||||
L.buf.setLen(L.buf.len * 2)
|
||||
charsRead = readDataStr(L.input, L.buf, oldBufLen ..< L.buf.len)
|
||||
if charsRead < oldBufLen:
|
||||
L.buf[oldBufLen + charsRead] = EndOfFile
|
||||
L.sentinel = oldBufLen + charsRead
|
||||
break
|
||||
s = L.bufLen - 1
|
||||
s = L.buf.len - 1
|
||||
|
||||
proc fillBaseLexer(L: var BaseLexer, pos: int): int =
|
||||
assert(pos <= L.sentinel)
|
||||
@@ -148,12 +137,8 @@ proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192;
|
||||
L.input = input
|
||||
L.bufpos = 0
|
||||
L.offsetBase = 0
|
||||
L.bufLen = bufLen
|
||||
L.refillChars = refillChars
|
||||
when defined(js):
|
||||
L.buf = newString(bufLen)
|
||||
else:
|
||||
L.buf = cast[cstring](alloc(bufLen * chrSize))
|
||||
L.buf = newString(bufLen)
|
||||
L.sentinel = bufLen - 1
|
||||
L.lineStart = 0
|
||||
L.lineNumber = 1 # lines start at 1
|
||||
|
||||
@@ -261,35 +261,32 @@ proc handleCRLF(c: var CfgParser, pos: int): int =
|
||||
|
||||
proc getString(c: var CfgParser, tok: var Token, rawMode: bool) =
|
||||
var pos = c.bufpos + 1 # skip "
|
||||
var buf = c.buf # put `buf` in a register
|
||||
tok.kind = tkSymbol
|
||||
if (buf[pos] == '"') and (buf[pos + 1] == '"'):
|
||||
if (c.buf[pos] == '"') and (c.buf[pos + 1] == '"'):
|
||||
# long string literal:
|
||||
inc(pos, 2) # skip ""
|
||||
# skip leading newline:
|
||||
pos = handleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of '"':
|
||||
if (buf[pos + 1] == '"') and (buf[pos + 2] == '"'): break
|
||||
if (c.buf[pos + 1] == '"') and (c.buf[pos + 2] == '"'): break
|
||||
add(tok.literal, '"')
|
||||
inc(pos)
|
||||
of '\c', '\L':
|
||||
pos = handleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
add(tok.literal, "\n")
|
||||
of lexbase.EndOfFile:
|
||||
tok.kind = tkInvalid
|
||||
break
|
||||
else:
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
c.bufpos = pos + 3 # skip the three """
|
||||
else:
|
||||
# ordinary string literal
|
||||
while true:
|
||||
var ch = buf[pos]
|
||||
var ch = c.buf[pos]
|
||||
if ch == '"':
|
||||
inc(pos) # skip '"'
|
||||
break
|
||||
@@ -307,26 +304,23 @@ proc getString(c: var CfgParser, tok: var Token, rawMode: bool) =
|
||||
|
||||
proc getSymbol(c: var CfgParser, tok: var Token) =
|
||||
var pos = c.bufpos
|
||||
var buf = c.buf
|
||||
while true:
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
if not (buf[pos] in SymChars): break
|
||||
if not (c.buf[pos] in SymChars): break
|
||||
c.bufpos = pos
|
||||
tok.kind = tkSymbol
|
||||
|
||||
proc skip(c: var CfgParser) =
|
||||
var pos = c.bufpos
|
||||
var buf = c.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of ' ', '\t':
|
||||
inc(pos)
|
||||
of '#', ';':
|
||||
while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
|
||||
while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
|
||||
of '\c', '\L':
|
||||
pos = handleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
else:
|
||||
break # EndOfFile also leaves the loop
|
||||
c.bufpos = pos
|
||||
|
||||
@@ -156,44 +156,41 @@ proc open*(my: var CsvParser, filename: string,
|
||||
|
||||
proc parseField(my: var CsvParser, a: var string) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if my.skipWhite:
|
||||
while buf[pos] in {' ', '\t'}: inc(pos)
|
||||
while my.buf[pos] in {' ', '\t'}: inc(pos)
|
||||
setLen(a, 0) # reuse memory
|
||||
if buf[pos] == my.quote and my.quote != '\0':
|
||||
if my.buf[pos] == my.quote and my.quote != '\0':
|
||||
inc(pos)
|
||||
while true:
|
||||
let c = buf[pos]
|
||||
let c = my.buf[pos]
|
||||
if c == '\0':
|
||||
my.bufpos = pos # can continue after exception?
|
||||
error(my, pos, my.quote & " expected")
|
||||
break
|
||||
elif c == my.quote:
|
||||
if my.esc == '\0' and buf[pos+1] == my.quote:
|
||||
if my.esc == '\0' and my.buf[pos+1] == my.quote:
|
||||
add(a, my.quote)
|
||||
inc(pos, 2)
|
||||
else:
|
||||
inc(pos)
|
||||
break
|
||||
elif c == my.esc:
|
||||
add(a, buf[pos+1])
|
||||
add(a, my.buf[pos+1])
|
||||
inc(pos, 2)
|
||||
else:
|
||||
case c
|
||||
of '\c':
|
||||
pos = handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(a, "\n")
|
||||
of '\l':
|
||||
pos = handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(a, "\n")
|
||||
else:
|
||||
add(a, c)
|
||||
inc(pos)
|
||||
else:
|
||||
while true:
|
||||
let c = buf[pos]
|
||||
let c = my.buf[pos]
|
||||
if c == my.sep: break
|
||||
if c in {'\c', '\l', '\0'}: break
|
||||
add(a, c)
|
||||
|
||||
@@ -182,11 +182,10 @@ proc parseEscapedUTF16*(buf: cstring, pos: var int): int =
|
||||
proc parseString(my: var JsonParser): TokKind =
|
||||
result = tkString
|
||||
var pos = my.bufpos + 1
|
||||
var buf = my.buf
|
||||
if my.rawStringLiterals:
|
||||
add(my.a, '"')
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '\0':
|
||||
my.err = errQuoteExpected
|
||||
result = tkError
|
||||
@@ -199,9 +198,9 @@ proc parseString(my: var JsonParser): TokKind =
|
||||
of '\\':
|
||||
if my.rawStringLiterals:
|
||||
add(my.a, '\\')
|
||||
case buf[pos+1]
|
||||
case my.buf[pos+1]
|
||||
of '\\', '"', '\'', '/':
|
||||
add(my.a, buf[pos+1])
|
||||
add(my.a, my.buf[pos+1])
|
||||
inc(pos, 2)
|
||||
of 'b':
|
||||
add(my.a, '\b')
|
||||
@@ -223,17 +222,17 @@ proc parseString(my: var JsonParser): TokKind =
|
||||
add(my.a, 'u')
|
||||
inc(pos, 2)
|
||||
var pos2 = pos
|
||||
var r = parseEscapedUTF16(buf, pos)
|
||||
var r = parseEscapedUTF16(my.buf, pos)
|
||||
if r < 0:
|
||||
my.err = errInvalidToken
|
||||
break
|
||||
# Deal with surrogates
|
||||
if (r and 0xfc00) == 0xd800:
|
||||
if buf[pos] != '\\' or buf[pos+1] != 'u':
|
||||
if my.buf[pos] != '\\' or my.buf[pos+1] != 'u':
|
||||
my.err = errInvalidToken
|
||||
break
|
||||
inc(pos, 2)
|
||||
var s = parseEscapedUTF16(buf, pos)
|
||||
var s = parseEscapedUTF16(my.buf, pos)
|
||||
if (s and 0xfc00) == 0xdc00 and s > 0:
|
||||
r = 0x10000 + (((r - 0xd800) shl 10) or (s - 0xdc00))
|
||||
else:
|
||||
@@ -242,8 +241,8 @@ proc parseString(my: var JsonParser): TokKind =
|
||||
if my.rawStringLiterals:
|
||||
let length = pos - pos2
|
||||
for i in 1 .. length:
|
||||
if buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}:
|
||||
add(my.a, buf[pos2])
|
||||
if my.buf[pos2] in {'0'..'9', 'A'..'F', 'a'..'f'}:
|
||||
add(my.a, my.buf[pos2])
|
||||
inc pos2
|
||||
else:
|
||||
break
|
||||
@@ -251,61 +250,54 @@ proc parseString(my: var JsonParser): TokKind =
|
||||
add(my.a, toUTF8(Rune(r)))
|
||||
else:
|
||||
# don't bother with the error
|
||||
add(my.a, buf[pos])
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\c')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos # store back
|
||||
|
||||
proc skip(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '/':
|
||||
if buf[pos+1] == '/':
|
||||
if my.buf[pos+1] == '/':
|
||||
# skip line comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '\0':
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
break
|
||||
else:
|
||||
inc(pos)
|
||||
elif buf[pos+1] == '*':
|
||||
elif my.buf[pos+1] == '*':
|
||||
# skip long comment:
|
||||
inc(pos, 2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '\0':
|
||||
my.err = errEOC_Expected
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
of '*':
|
||||
inc(pos)
|
||||
if buf[pos] == '/':
|
||||
if my.buf[pos] == '/':
|
||||
inc(pos)
|
||||
break
|
||||
else:
|
||||
@@ -316,51 +308,47 @@ proc skip(my: var JsonParser) =
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
else:
|
||||
break
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseNumber(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] == '-':
|
||||
if my.buf[pos] == '-':
|
||||
add(my.a, '-')
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
if my.buf[pos] == '.':
|
||||
add(my.a, "0.")
|
||||
inc(pos)
|
||||
else:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
while my.buf[pos] in Digits:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
if my.buf[pos] == '.':
|
||||
add(my.a, '.')
|
||||
inc(pos)
|
||||
# digits after the dot:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
while my.buf[pos] in Digits:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'E', 'e'}:
|
||||
add(my.a, buf[pos])
|
||||
if my.buf[pos] in {'E', 'e'}:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'+', '-'}:
|
||||
add(my.a, buf[pos])
|
||||
if my.buf[pos] in {'+', '-'}:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
while my.buf[pos] in Digits:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseName(my: var JsonParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] in IdentStartChars:
|
||||
while buf[pos] in IdentChars:
|
||||
add(my.a, buf[pos])
|
||||
if my.buf[pos] in IdentStartChars:
|
||||
while my.buf[pos] in IdentChars:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
|
||||
@@ -148,35 +148,33 @@ proc handleCRLF(c: var SqlLexer, pos: int): int =
|
||||
|
||||
proc skip(c: var SqlLexer) =
|
||||
var pos = c.bufpos
|
||||
var buf = c.buf
|
||||
var nested = 0
|
||||
while true:
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of ' ', '\t':
|
||||
inc(pos)
|
||||
of '-':
|
||||
if buf[pos+1] == '-':
|
||||
while not (buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
|
||||
if c.buf[pos+1] == '-':
|
||||
while not (c.buf[pos] in {'\c', '\L', lexbase.EndOfFile}): inc(pos)
|
||||
else:
|
||||
break
|
||||
of '/':
|
||||
if buf[pos+1] == '*':
|
||||
if c.buf[pos+1] == '*':
|
||||
inc(pos,2)
|
||||
while true:
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of '\0': break
|
||||
of '\c', '\L':
|
||||
pos = handleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
of '*':
|
||||
if buf[pos+1] == '/':
|
||||
if c.buf[pos+1] == '/':
|
||||
inc(pos, 2)
|
||||
if nested <= 0: break
|
||||
dec(nested)
|
||||
else:
|
||||
inc(pos)
|
||||
of '/':
|
||||
if buf[pos+1] == '*':
|
||||
if c.buf[pos+1] == '*':
|
||||
inc(pos, 2)
|
||||
inc(nested)
|
||||
else:
|
||||
@@ -185,21 +183,19 @@ proc skip(c: var SqlLexer) =
|
||||
else: break
|
||||
of '\c', '\L':
|
||||
pos = handleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
else:
|
||||
break # EndOfFile also leaves the loop
|
||||
c.bufpos = pos
|
||||
|
||||
proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) =
|
||||
var pos = c.bufpos + 1
|
||||
var buf = c.buf
|
||||
tok.kind = kind
|
||||
block parseLoop:
|
||||
while true:
|
||||
while true:
|
||||
var ch = buf[pos]
|
||||
var ch = c.buf[pos]
|
||||
if ch == '\'':
|
||||
if buf[pos+1] == '\'':
|
||||
if c.buf[pos+1] == '\'':
|
||||
inc(pos, 2)
|
||||
add(tok.literal, '\'')
|
||||
else:
|
||||
@@ -221,30 +217,27 @@ proc getString(c: var SqlLexer, tok: var Token, kind: TokKind) =
|
||||
if c.lineNumber > line:
|
||||
# a new line whitespace has been parsed, so we check if the string
|
||||
# continues after the whitespace:
|
||||
buf = c.buf # may have been reallocated
|
||||
pos = c.bufpos
|
||||
if buf[pos] == '\'': inc(pos)
|
||||
if c.buf[pos] == '\'': inc(pos)
|
||||
else: break parseLoop
|
||||
else: break parseLoop
|
||||
c.bufpos = pos
|
||||
|
||||
proc getDollarString(c: var SqlLexer, tok: var Token) =
|
||||
var pos = c.bufpos + 1
|
||||
var buf = c.buf
|
||||
tok.kind = tkDollarQuotedConstant
|
||||
var tag = "$"
|
||||
while buf[pos] in IdentChars:
|
||||
add(tag, buf[pos])
|
||||
while c.buf[pos] in IdentChars:
|
||||
add(tag, c.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '$': inc(pos)
|
||||
if c.buf[pos] == '$': inc(pos)
|
||||
else:
|
||||
tok.kind = tkInvalid
|
||||
return
|
||||
while true:
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of '\c', '\L':
|
||||
pos = handleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
add(tok.literal, "\L")
|
||||
of '\0':
|
||||
tok.kind = tkInvalid
|
||||
@@ -252,37 +245,35 @@ proc getDollarString(c: var SqlLexer, tok: var Token) =
|
||||
of '$':
|
||||
inc(pos)
|
||||
var tag2 = "$"
|
||||
while buf[pos] in IdentChars:
|
||||
add(tag2, buf[pos])
|
||||
while c.buf[pos] in IdentChars:
|
||||
add(tag2, c.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '$': inc(pos)
|
||||
if c.buf[pos] == '$': inc(pos)
|
||||
if tag2 == tag: break
|
||||
add(tok.literal, tag2)
|
||||
add(tok.literal, '$')
|
||||
else:
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
c.bufpos = pos
|
||||
|
||||
proc getSymbol(c: var SqlLexer, tok: var Token) =
|
||||
var pos = c.bufpos
|
||||
var buf = c.buf
|
||||
while true:
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] notin {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}:
|
||||
if c.buf[pos] notin {'a'..'z','A'..'Z','0'..'9','_','$', '\128'..'\255'}:
|
||||
break
|
||||
c.bufpos = pos
|
||||
tok.kind = tkIdentifier
|
||||
|
||||
proc getQuotedIdentifier(c: var SqlLexer, tok: var Token, quote='\"') =
|
||||
var pos = c.bufpos + 1
|
||||
var buf = c.buf
|
||||
tok.kind = tkQuotedIdentifier
|
||||
while true:
|
||||
var ch = buf[pos]
|
||||
var ch = c.buf[pos]
|
||||
if ch == quote:
|
||||
if buf[pos+1] == quote:
|
||||
if c.buf[pos+1] == quote:
|
||||
inc(pos, 2)
|
||||
add(tok.literal, quote)
|
||||
else:
|
||||
@@ -298,11 +289,10 @@ proc getQuotedIdentifier(c: var SqlLexer, tok: var Token, quote='\"') =
|
||||
|
||||
proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) =
|
||||
var pos = c.bufpos + 1
|
||||
var buf = c.buf
|
||||
block parseLoop:
|
||||
while true:
|
||||
while true:
|
||||
var ch = buf[pos]
|
||||
var ch = c.buf[pos]
|
||||
if ch in validChars:
|
||||
add(tok.literal, ch)
|
||||
inc(pos)
|
||||
@@ -318,9 +308,8 @@ proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) =
|
||||
if c.lineNumber > line:
|
||||
# a new line whitespace has been parsed, so we check if the string
|
||||
# continues after the whitespace:
|
||||
buf = c.buf # may have been reallocated
|
||||
pos = c.bufpos
|
||||
if buf[pos] == '\'': inc(pos)
|
||||
if c.buf[pos] == '\'': inc(pos)
|
||||
else: break parseLoop
|
||||
else: break parseLoop
|
||||
c.bufpos = pos
|
||||
@@ -328,29 +317,28 @@ proc getBitHexString(c: var SqlLexer, tok: var Token, validChars: set[char]) =
|
||||
proc getNumeric(c: var SqlLexer, tok: var Token) =
|
||||
tok.kind = tkInteger
|
||||
var pos = c.bufpos
|
||||
var buf = c.buf
|
||||
while buf[pos] in Digits:
|
||||
add(tok.literal, buf[pos])
|
||||
while c.buf[pos] in Digits:
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
if c.buf[pos] == '.':
|
||||
tok.kind = tkNumeric
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
while buf[pos] in Digits:
|
||||
add(tok.literal, buf[pos])
|
||||
while c.buf[pos] in Digits:
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'E', 'e'}:
|
||||
if c.buf[pos] in {'E', 'e'}:
|
||||
tok.kind = tkNumeric
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '+':
|
||||
if c.buf[pos] == '+':
|
||||
inc(pos)
|
||||
elif buf[pos] == '-':
|
||||
add(tok.literal, buf[pos])
|
||||
elif c.buf[pos] == '-':
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in Digits:
|
||||
while buf[pos] in Digits:
|
||||
add(tok.literal, buf[pos])
|
||||
if c.buf[pos] in Digits:
|
||||
while c.buf[pos] in Digits:
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
else:
|
||||
tok.kind = tkInvalid
|
||||
@@ -361,24 +349,23 @@ proc getOperator(c: var SqlLexer, tok: var Token) =
|
||||
'^', '&', '|', '`', '?'}
|
||||
tok.kind = tkOperator
|
||||
var pos = c.bufpos
|
||||
var buf = c.buf
|
||||
var trailingPlusMinus = false
|
||||
while true:
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of '-':
|
||||
if buf[pos] == '-': break
|
||||
if not trailingPlusMinus and buf[pos+1] notin operators and
|
||||
if c.buf[pos] == '-': break
|
||||
if not trailingPlusMinus and c.buf[pos+1] notin operators and
|
||||
tok.literal.len > 0: break
|
||||
of '/':
|
||||
if buf[pos] == '*': break
|
||||
if c.buf[pos] == '*': break
|
||||
of '~', '!', '@', '#', '%', '^', '&', '|', '`', '?':
|
||||
trailingPlusMinus = true
|
||||
of '+':
|
||||
if not trailingPlusMinus and buf[pos+1] notin operators and
|
||||
if not trailingPlusMinus and c.buf[pos+1] notin operators and
|
||||
tok.literal.len > 0: break
|
||||
of '*', '<', '>', '=': discard
|
||||
else: break
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
c.bufpos = pos
|
||||
|
||||
|
||||
@@ -345,11 +345,10 @@ proc markError(my: var XmlParser, kind: XmlErrorKind) {.inline.} =
|
||||
|
||||
proc parseCDATA(my: var XmlParser) =
|
||||
var pos = my.bufpos + len("<![CDATA[")
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of ']':
|
||||
if buf[pos+1] == ']' and buf[pos+2] == '>':
|
||||
if my.buf[pos+1] == ']' and my.buf[pos+2] == '>':
|
||||
inc(pos, 3)
|
||||
break
|
||||
add(my.a, ']')
|
||||
@@ -359,29 +358,25 @@ proc parseCDATA(my: var XmlParser) =
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
of '/':
|
||||
pos = lexbase.handleRefillChar(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '/')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos # store back
|
||||
my.kind = xmlCData
|
||||
|
||||
proc parseComment(my: var XmlParser) =
|
||||
var pos = my.bufpos + len("<!--")
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '-':
|
||||
if buf[pos+1] == '-' and buf[pos+2] == '>':
|
||||
if my.buf[pos+1] == '-' and my.buf[pos+2] == '>':
|
||||
inc(pos, 3)
|
||||
break
|
||||
if my.options.contains(reportComments): add(my.a, '-')
|
||||
@@ -391,38 +386,32 @@ proc parseComment(my: var XmlParser) =
|
||||
break
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
if my.options.contains(reportComments): add(my.a, '\L')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
if my.options.contains(reportComments): add(my.a, '\L')
|
||||
of '/':
|
||||
pos = lexbase.handleRefillChar(my, pos)
|
||||
buf = my.buf
|
||||
if my.options.contains(reportComments): add(my.a, '/')
|
||||
else:
|
||||
if my.options.contains(reportComments): add(my.a, buf[pos])
|
||||
if my.options.contains(reportComments): add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
my.kind = xmlComment
|
||||
|
||||
proc parseWhitespace(my: var XmlParser, skip=false) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of ' ', '\t':
|
||||
if not skip: add(my.a, buf[pos])
|
||||
if not skip: add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
of '\c':
|
||||
# the specification says that CR-LF, CR are to be transformed to LF
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
if not skip: add(my.a, '\L')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
if not skip: add(my.a, '\L')
|
||||
else:
|
||||
break
|
||||
@@ -434,53 +423,51 @@ const
|
||||
|
||||
proc parseName(my: var XmlParser, dest: var string) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] in NameStartChar:
|
||||
if my.buf[pos] in NameStartChar:
|
||||
while true:
|
||||
add(dest, buf[pos])
|
||||
add(dest, my.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] notin NameChar: break
|
||||
if my.buf[pos] notin NameChar: break
|
||||
my.bufpos = pos
|
||||
else:
|
||||
markError(my, errNameExpected)
|
||||
|
||||
proc parseEntity(my: var XmlParser, dest: var string) =
|
||||
var pos = my.bufpos+1
|
||||
var buf = my.buf
|
||||
my.kind = xmlCharData
|
||||
if buf[pos] == '#':
|
||||
if my.buf[pos] == '#':
|
||||
var r: int
|
||||
inc(pos)
|
||||
if buf[pos] == 'x':
|
||||
if my.buf[pos] == 'x':
|
||||
inc(pos)
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '0'..'9': r = (r shl 4) or (ord(buf[pos]) - ord('0'))
|
||||
of 'a'..'f': r = (r shl 4) or (ord(buf[pos]) - ord('a') + 10)
|
||||
of 'A'..'F': r = (r shl 4) or (ord(buf[pos]) - ord('A') + 10)
|
||||
case my.buf[pos]
|
||||
of '0'..'9': r = (r shl 4) or (ord(my.buf[pos]) - ord('0'))
|
||||
of 'a'..'f': r = (r shl 4) or (ord(my.buf[pos]) - ord('a') + 10)
|
||||
of 'A'..'F': r = (r shl 4) or (ord(my.buf[pos]) - ord('A') + 10)
|
||||
else: break
|
||||
inc(pos)
|
||||
else:
|
||||
while buf[pos] in {'0'..'9'}:
|
||||
r = r * 10 + (ord(buf[pos]) - ord('0'))
|
||||
while my.buf[pos] in {'0'..'9'}:
|
||||
r = r * 10 + (ord(my.buf[pos]) - ord('0'))
|
||||
inc(pos)
|
||||
add(dest, toUTF8(Rune(r)))
|
||||
elif buf[pos] == 'l' and buf[pos+1] == 't' and buf[pos+2] == ';':
|
||||
elif my.buf[pos] == 'l' and my.buf[pos+1] == 't' and my.buf[pos+2] == ';':
|
||||
add(dest, '<')
|
||||
inc(pos, 2)
|
||||
elif buf[pos] == 'g' and buf[pos+1] == 't' and buf[pos+2] == ';':
|
||||
elif my.buf[pos] == 'g' and my.buf[pos+1] == 't' and my.buf[pos+2] == ';':
|
||||
add(dest, '>')
|
||||
inc(pos, 2)
|
||||
elif buf[pos] == 'a' and buf[pos+1] == 'm' and buf[pos+2] == 'p' and
|
||||
buf[pos+3] == ';':
|
||||
elif my.buf[pos] == 'a' and my.buf[pos+1] == 'm' and my.buf[pos+2] == 'p' and
|
||||
my.buf[pos+3] == ';':
|
||||
add(dest, '&')
|
||||
inc(pos, 3)
|
||||
elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and
|
||||
buf[pos+3] == 's' and buf[pos+4] == ';':
|
||||
elif my.buf[pos] == 'a' and my.buf[pos+1] == 'p' and my.buf[pos+2] == 'o' and
|
||||
my.buf[pos+3] == 's' and my.buf[pos+4] == ';':
|
||||
add(dest, '\'')
|
||||
inc(pos, 4)
|
||||
elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and
|
||||
buf[pos+3] == 't' and buf[pos+4] == ';':
|
||||
elif my.buf[pos] == 'q' and my.buf[pos+1] == 'u' and my.buf[pos+2] == 'o' and
|
||||
my.buf[pos+3] == 't' and my.buf[pos+4] == ';':
|
||||
add(dest, '"')
|
||||
inc(pos, 4)
|
||||
else:
|
||||
@@ -491,7 +478,7 @@ proc parseEntity(my: var XmlParser, dest: var string) =
|
||||
my.kind = xmlEntity
|
||||
else:
|
||||
add(dest, '&')
|
||||
if buf[pos] == ';':
|
||||
if my.buf[pos] == ';':
|
||||
inc(pos)
|
||||
else:
|
||||
markError(my, errSemicolonExpected)
|
||||
@@ -501,15 +488,14 @@ proc parsePI(my: var XmlParser) =
|
||||
inc(my.bufpos, "<?".len)
|
||||
parseName(my, my.a)
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
setLen(my.b, 0)
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '\0':
|
||||
markError(my, errQmGtExpected)
|
||||
break
|
||||
of '?':
|
||||
if buf[pos+1] == '>':
|
||||
if my.buf[pos+1] == '>':
|
||||
inc(pos, 2)
|
||||
break
|
||||
add(my.b, '?')
|
||||
@@ -517,18 +503,15 @@ proc parsePI(my: var XmlParser) =
|
||||
of '\c':
|
||||
# the specification says that CR-LF, CR are to be transformed to LF
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.b, '\L')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.b, '\L')
|
||||
of '/':
|
||||
pos = lexbase.handleRefillChar(my, pos)
|
||||
buf = my.buf
|
||||
add(my.b, '/')
|
||||
else:
|
||||
add(my.b, buf[pos])
|
||||
add(my.b, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
my.kind = xmlPI
|
||||
@@ -536,10 +519,9 @@ proc parsePI(my: var XmlParser) =
|
||||
proc parseSpecial(my: var XmlParser) =
|
||||
# things that start with <!
|
||||
var pos = my.bufpos + 2
|
||||
var buf = my.buf
|
||||
var opentags = 0
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '\0':
|
||||
markError(my, errGtExpected)
|
||||
break
|
||||
@@ -556,18 +538,15 @@ proc parseSpecial(my: var XmlParser) =
|
||||
add(my.a, '>')
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
of '/':
|
||||
pos = lexbase.handleRefillChar(my, pos)
|
||||
buf = my.buf
|
||||
add(my.b, '/')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
my.kind = xmlSpecial
|
||||
@@ -635,13 +614,12 @@ proc parseAttribute(my: var XmlParser) =
|
||||
parseWhitespace(my, skip=true)
|
||||
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] in {'\'', '"'}:
|
||||
var quote = buf[pos]
|
||||
if my.buf[pos] in {'\'', '"'}:
|
||||
var quote = my.buf[pos]
|
||||
var pendingSpace = false
|
||||
inc(pos)
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '\0':
|
||||
markError(my, errQuoteExpected)
|
||||
break
|
||||
@@ -658,31 +636,28 @@ proc parseAttribute(my: var XmlParser) =
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
pendingSpace = true
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
pendingSpace = true
|
||||
of '/':
|
||||
pos = lexbase.handleRefillChar(my, pos)
|
||||
buf = my.buf
|
||||
add(my.b, '/')
|
||||
else:
|
||||
if buf[pos] == quote:
|
||||
if my.buf[pos] == quote:
|
||||
inc(pos)
|
||||
break
|
||||
else:
|
||||
if pendingSpace:
|
||||
add(my.b, ' ')
|
||||
pendingSpace = false
|
||||
add(my.b, buf[pos])
|
||||
add(my.b, my.buf[pos])
|
||||
inc(pos)
|
||||
elif allowUnquotedAttribs in my.options:
|
||||
const disallowedChars = {'"', '\'', '`', '=', '<', '>', ' ',
|
||||
'\0', '\t', '\L', '\F', '\f'}
|
||||
let startPos = pos
|
||||
while (let c = buf[pos]; c notin disallowedChars):
|
||||
while (let c = my.buf[pos]; c notin disallowedChars):
|
||||
if c == '&':
|
||||
my.bufpos = pos
|
||||
parseEntity(my, my.b)
|
||||
@@ -696,33 +671,29 @@ proc parseAttribute(my: var XmlParser) =
|
||||
else:
|
||||
markError(my, errQuoteExpected)
|
||||
# error corrections: guess what was meant
|
||||
while buf[pos] != '>' and buf[pos] > ' ':
|
||||
add(my.b, buf[pos])
|
||||
while my.buf[pos] != '>' and my.buf[pos] > ' ':
|
||||
add(my.b, my.buf[pos])
|
||||
inc pos
|
||||
my.bufpos = pos
|
||||
parseWhitespace(my, skip=true)
|
||||
|
||||
proc parseCharData(my: var XmlParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '\0', '<', '&': break
|
||||
of '\c':
|
||||
# the specification says that CR-LF, CR are to be transformed to LF
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
of '/':
|
||||
pos = lexbase.handleRefillChar(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '/')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
my.kind = xmlCharData
|
||||
@@ -731,18 +702,17 @@ proc rawGetTok(my: var XmlParser) =
|
||||
my.kind = xmlError
|
||||
setLen(my.a, 0)
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '<':
|
||||
case buf[pos+1]
|
||||
case my.buf[pos+1]
|
||||
of '/':
|
||||
parseEndTag(my)
|
||||
of '!':
|
||||
if buf[pos+2] == '[' and buf[pos+3] == 'C' and buf[pos+4] == 'D' and
|
||||
buf[pos+5] == 'A' and buf[pos+6] == 'T' and buf[pos+7] == 'A' and
|
||||
buf[pos+8] == '[':
|
||||
if my.buf[pos+2] == '[' and my.buf[pos+3] == 'C' and my.buf[pos+4] == 'D' and
|
||||
my.buf[pos+5] == 'A' and my.buf[pos+6] == 'T' and my.buf[pos+7] == 'A' and
|
||||
my.buf[pos+8] == '[':
|
||||
parseCDATA(my)
|
||||
elif buf[pos+2] == '-' and buf[pos+3] == '-':
|
||||
elif my.buf[pos+2] == '-' and my.buf[pos+3] == '-':
|
||||
parseComment(my)
|
||||
else:
|
||||
parseSpecial(my)
|
||||
@@ -841,4 +811,3 @@ when not defined(testing) and isMainModule:
|
||||
of xmlSpecial:
|
||||
echo("SPECIAL: " & x.charData)
|
||||
close(x)
|
||||
|
||||
|
||||
@@ -1545,20 +1545,17 @@ proc getEscapedChar(c: var PegLexer, tok: var Token) =
|
||||
|
||||
proc skip(c: var PegLexer) =
|
||||
var pos = c.bufpos
|
||||
var buf = c.buf
|
||||
while pos < c.buf.len:
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of ' ', '\t':
|
||||
inc(pos)
|
||||
of '#':
|
||||
while (pos < c.buf.len) and
|
||||
not (buf[pos] in {'\c', '\L', '\0'}): inc(pos)
|
||||
not (c.buf[pos] in {'\c', '\L', '\0'}): inc(pos)
|
||||
of '\c':
|
||||
pos = handleCR(c, pos)
|
||||
buf = c.buf
|
||||
of '\L':
|
||||
pos = handleLF(c, pos)
|
||||
buf = c.buf
|
||||
else:
|
||||
break # EndOfFile also leaves the loop
|
||||
c.bufpos = pos
|
||||
@@ -1566,10 +1563,9 @@ proc skip(c: var PegLexer) =
|
||||
proc getString(c: var PegLexer, tok: var Token) =
|
||||
tok.kind = tkStringLit
|
||||
var pos = c.bufpos + 1
|
||||
var buf = c.buf
|
||||
var quote = buf[pos-1]
|
||||
var quote = c.buf[pos-1]
|
||||
while pos < c.buf.len:
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of '\\':
|
||||
c.bufpos = pos
|
||||
getEscapedChar(c, tok)
|
||||
@@ -1577,22 +1573,21 @@ proc getString(c: var PegLexer, tok: var Token) =
|
||||
of '\c', '\L', '\0':
|
||||
tok.kind = tkInvalid
|
||||
break
|
||||
elif buf[pos] == quote:
|
||||
elif c.buf[pos] == quote:
|
||||
inc(pos)
|
||||
break
|
||||
else:
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
c.bufpos = pos
|
||||
|
||||
proc getDollar(c: var PegLexer, tok: var Token) =
|
||||
var pos = c.bufpos + 1
|
||||
var buf = c.buf
|
||||
if buf[pos] in {'0'..'9'}:
|
||||
if c.buf[pos] in {'0'..'9'}:
|
||||
tok.kind = tkBackref
|
||||
tok.index = 0
|
||||
while pos < c.buf.len and buf[pos] in {'0'..'9'}:
|
||||
tok.index = tok.index * 10 + ord(buf[pos]) - ord('0')
|
||||
while pos < c.buf.len and c.buf[pos] in {'0'..'9'}:
|
||||
tok.index = tok.index * 10 + ord(c.buf[pos]) - ord('0')
|
||||
inc(pos)
|
||||
else:
|
||||
tok.kind = tkDollar
|
||||
@@ -1602,14 +1597,13 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
|
||||
tok.kind = tkCharSet
|
||||
tok.charset = {}
|
||||
var pos = c.bufpos + 1
|
||||
var buf = c.buf
|
||||
var caret = false
|
||||
if buf[pos] == '^':
|
||||
if c.buf[pos] == '^':
|
||||
inc(pos)
|
||||
caret = true
|
||||
while pos < c.buf.len:
|
||||
var ch: char
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of ']':
|
||||
if pos < c.buf.len: inc(pos)
|
||||
break
|
||||
@@ -1622,11 +1616,11 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
|
||||
tok.kind = tkInvalid
|
||||
break
|
||||
else:
|
||||
ch = buf[pos]
|
||||
ch = c.buf[pos]
|
||||
inc(pos)
|
||||
incl(tok.charset, ch)
|
||||
if buf[pos] == '-':
|
||||
if pos+1 < c.buf.len and buf[pos+1] == ']':
|
||||
if c.buf[pos] == '-':
|
||||
if pos+1 < c.buf.len and c.buf[pos+1] == ']':
|
||||
incl(tok.charset, '-')
|
||||
inc(pos)
|
||||
else:
|
||||
@@ -1635,7 +1629,7 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
|
||||
else:
|
||||
break
|
||||
var ch2: char
|
||||
case buf[pos]
|
||||
case c.buf[pos]
|
||||
of '\\':
|
||||
c.bufpos = pos
|
||||
getEscapedChar(c, tok)
|
||||
@@ -1646,7 +1640,7 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
|
||||
break
|
||||
else:
|
||||
if pos+1 < c.buf.len:
|
||||
ch2 = buf[pos]
|
||||
ch2 = c.buf[pos]
|
||||
inc(pos)
|
||||
else:
|
||||
break
|
||||
@@ -1657,11 +1651,10 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
|
||||
|
||||
proc getSymbol(c: var PegLexer, tok: var Token) =
|
||||
var pos = c.bufpos
|
||||
var buf = c.buf
|
||||
while pos < c.buf.len:
|
||||
add(tok.literal, buf[pos])
|
||||
add(tok.literal, c.buf[pos])
|
||||
inc(pos)
|
||||
if pos < buf.len and buf[pos] notin strutils.IdentChars: break
|
||||
if pos < c.buf.len and c.buf[pos] notin strutils.IdentChars: break
|
||||
c.bufpos = pos
|
||||
tok.kind = tkIdentifier
|
||||
|
||||
|
||||
@@ -53,12 +53,17 @@ type
|
||||
{.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.}
|
||||
getPositionImpl*: proc (s: Stream): int
|
||||
{.nimcall, raises: [Defect, IOError, OSError], tags: [], gcsafe.}
|
||||
|
||||
readDataStrImpl*: proc (s: Stream, buffer: var string, slice: Slice[int]): int
|
||||
{.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.}
|
||||
|
||||
readDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int): int
|
||||
{.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.}
|
||||
peekDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int): int
|
||||
{.nimcall, raises: [Defect, IOError, OSError], tags: [ReadIOEffect], gcsafe.}
|
||||
writeDataImpl*: proc (s: Stream, buffer: pointer, bufLen: int)
|
||||
{.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.}
|
||||
{.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.}
|
||||
|
||||
flushImpl*: proc (s: Stream)
|
||||
{.nimcall, raises: [Defect, IOError, OSError], tags: [WriteIOEffect], gcsafe.}
|
||||
|
||||
@@ -87,6 +92,14 @@ proc readData*(s: Stream, buffer: pointer, bufLen: int): int =
|
||||
## low level proc that reads data into an untyped `buffer` of `bufLen` size.
|
||||
result = s.readDataImpl(s, buffer, bufLen)
|
||||
|
||||
proc readDataStr*(s: Stream, buffer: var string, slice: Slice[int]): int =
|
||||
## low level proc that reads data into a string ``buffer`` at ``slice``.
|
||||
if s.readDataStrImpl != nil:
|
||||
result = s.readDataStrImpl(s, buffer, slice)
|
||||
else:
|
||||
# fallback
|
||||
result = s.readData(addr buffer[0], buffer.len)
|
||||
|
||||
when not defined(js):
|
||||
proc readAll*(s: Stream): string =
|
||||
## Reads all available data.
|
||||
@@ -344,6 +357,19 @@ when not defined(js):
|
||||
var s = StringStream(s)
|
||||
return s.pos
|
||||
|
||||
proc ssReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int =
|
||||
var s = StringStream(s)
|
||||
result = min(slice.b + 1 - slice.a, s.data.len - s.pos)
|
||||
if result > 0:
|
||||
when nimvm:
|
||||
for i in 0 ..< result: # sorry, but no fast string splicing on the vm.
|
||||
buffer[slice.a + i] = s.data[s.pos + i]
|
||||
else:
|
||||
copyMem(unsafeAddr buffer[slice.a], addr s.data[s.pos], result)
|
||||
inc(s.pos, result)
|
||||
else:
|
||||
result = 0
|
||||
|
||||
proc ssReadData(s: Stream, buffer: pointer, bufLen: int): int =
|
||||
var s = StringStream(s)
|
||||
result = min(bufLen, s.data.len - s.pos)
|
||||
@@ -389,6 +415,7 @@ when not defined(js):
|
||||
result.readDataImpl = ssReadData
|
||||
result.peekDataImpl = ssPeekData
|
||||
result.writeDataImpl = ssWriteData
|
||||
result.readDataStrImpl = ssReadDataStr
|
||||
|
||||
type
|
||||
FileStream* = ref FileStreamObj ## a stream that encapsulates a `File`
|
||||
@@ -407,6 +434,9 @@ when not defined(js):
|
||||
proc fsReadData(s: Stream, buffer: pointer, bufLen: int): int =
|
||||
result = readBuffer(FileStream(s).f, buffer, bufLen)
|
||||
|
||||
proc fsReadDataStr(s: Stream, buffer: var string, slice: Slice[int]): int =
|
||||
result = readBuffer(FileStream(s).f, addr buffer[slice.a], slice.b + 1 - slice.a)
|
||||
|
||||
proc fsPeekData(s: Stream, buffer: pointer, bufLen: int): int =
|
||||
let pos = fsGetPosition(s)
|
||||
defer: fsSetPosition(s, pos)
|
||||
@@ -424,6 +454,7 @@ when not defined(js):
|
||||
result.atEndImpl = fsAtEnd
|
||||
result.setPositionImpl = fsSetPosition
|
||||
result.getPositionImpl = fsGetPosition
|
||||
result.readDataStrImpl = fsReadDataStr
|
||||
result.readDataImpl = fsReadData
|
||||
result.peekDataImpl = fsPeekData
|
||||
result.writeDataImpl = fsWriteData
|
||||
|
||||
@@ -124,9 +124,8 @@ proc handleHexChar(c: char, x: var int): bool =
|
||||
proc parseString(my: var SexpParser): TTokKind =
|
||||
result = tkString
|
||||
var pos = my.bufpos + 1
|
||||
var buf = my.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
case my.buf[pos]
|
||||
of '\0':
|
||||
my.err = errQuoteExpected
|
||||
result = tkError
|
||||
@@ -135,9 +134,9 @@ proc parseString(my: var SexpParser): TTokKind =
|
||||
inc(pos)
|
||||
break
|
||||
of '\\':
|
||||
case buf[pos+1]
|
||||
case my.buf[pos+1]
|
||||
of '\\', '"', '\'', '/':
|
||||
add(my.a, buf[pos+1])
|
||||
add(my.a, my.buf[pos+1])
|
||||
inc(pos, 2)
|
||||
of 'b':
|
||||
add(my.a, '\b')
|
||||
@@ -157,65 +156,61 @@ proc parseString(my: var SexpParser): TTokKind =
|
||||
of 'u':
|
||||
inc(pos, 2)
|
||||
var r: int
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(buf[pos], r): inc(pos)
|
||||
if handleHexChar(my.buf[pos], r): inc(pos)
|
||||
if handleHexChar(my.buf[pos], r): inc(pos)
|
||||
if handleHexChar(my.buf[pos], r): inc(pos)
|
||||
if handleHexChar(my.buf[pos], r): inc(pos)
|
||||
add(my.a, toUTF8(Rune(r)))
|
||||
else:
|
||||
# don't bother with the error
|
||||
add(my.a, buf[pos])
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
of '\c':
|
||||
pos = lexbase.handleCR(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\c')
|
||||
of '\L':
|
||||
pos = lexbase.handleLF(my, pos)
|
||||
buf = my.buf
|
||||
add(my.a, '\L')
|
||||
else:
|
||||
add(my.a, buf[pos])
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos # store back
|
||||
|
||||
proc parseNumber(my: var SexpParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] == '-':
|
||||
if my.buf[pos] == '-':
|
||||
add(my.a, '-')
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
if my.buf[pos] == '.':
|
||||
add(my.a, "0.")
|
||||
inc(pos)
|
||||
else:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
while my.buf[pos] in Digits:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] == '.':
|
||||
if my.buf[pos] == '.':
|
||||
add(my.a, '.')
|
||||
inc(pos)
|
||||
# digits after the dot:
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
while my.buf[pos] in Digits:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'E', 'e'}:
|
||||
add(my.a, buf[pos])
|
||||
if my.buf[pos] in {'E', 'e'}:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
if buf[pos] in {'+', '-'}:
|
||||
add(my.a, buf[pos])
|
||||
if my.buf[pos] in {'+', '-'}:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
while buf[pos] in Digits:
|
||||
add(my.a, buf[pos])
|
||||
while my.buf[pos] in Digits:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
proc parseSymbol(my: var SexpParser) =
|
||||
var pos = my.bufpos
|
||||
var buf = my.buf
|
||||
if buf[pos] in IdentStartChars:
|
||||
while buf[pos] in IdentChars:
|
||||
add(my.a, buf[pos])
|
||||
if my.buf[pos] in IdentStartChars:
|
||||
while my.buf[pos] in IdentChars:
|
||||
add(my.a, my.buf[pos])
|
||||
inc(pos)
|
||||
my.bufpos = pos
|
||||
|
||||
|
||||
@@ -520,3 +520,43 @@ when true:
|
||||
block test_tuple:
|
||||
doAssert $(%* (a1: 10, a2: "foo")) == """{"a1":10,"a2":"foo"}"""
|
||||
doAssert $(%* (10, "foo")) == """[10,"foo"]"""
|
||||
|
||||
# TODO: when the issue with the limeted vm registers is solved, the
|
||||
# exact same test as above should be evaluated at compile time as
|
||||
# well, to ensure that the vm functionality won't diverge from the
|
||||
# runtime functionality. Until then, the following test should do it.
|
||||
|
||||
static:
|
||||
var t = parseJson("""
|
||||
{
|
||||
"name":"Bongo",
|
||||
"email":"bongo@bingo.com",
|
||||
"list": [11,7,15],
|
||||
"year": 1975,
|
||||
"dict": {"a": 1, "b": 2},
|
||||
"arr": [1.0, 2.0, 7.0],
|
||||
"person": {"name": "boney"},
|
||||
"dog": {"name": "honey"},
|
||||
"fruit": {"color": 10},
|
||||
"distfruit": {"color": 11},
|
||||
"emails": ["abc", "123"]
|
||||
}
|
||||
""")
|
||||
|
||||
doAssert t["name"].getStr == "Bongo"
|
||||
doAssert t["email"].getStr == "bongo@bingo.com"
|
||||
doAssert t["list"][0].getInt == 11
|
||||
doAssert t["list"][1].getInt == 7
|
||||
doAssert t["list"][2].getInt == 15
|
||||
doAssert t["year"].getInt == 1975
|
||||
doAssert t["dict"]["a"].getInt == 1
|
||||
doAssert t["dict"]["b"].getInt == 2
|
||||
doAssert t["arr"][0].getFloat == 1.0
|
||||
doAssert t["arr"][1].getFloat == 2.0
|
||||
doAssert t["arr"][2].getFloat == 7.0
|
||||
doAssert t["person"]["name"].getStr == "boney"
|
||||
doAssert t["distfruit"]["color"].getInt == 11
|
||||
doAssert t["dog"]["name"].getStr == "honey"
|
||||
doAssert t["fruit"]["color"].getInt == 10
|
||||
doAssert t["emails"][0].getStr == "abc"
|
||||
doAssert t["emails"][1].getStr == "123"
|
||||
|
||||
Reference in New Issue
Block a user