Update pegs.nim to work at compiletime. No range errors. (#13459)

This commit is contained in:
solo989
2020-02-27 02:08:57 -08:00
committed by GitHub
parent 6a0e87eb38
commit e84e01cb8c

View File

@@ -1420,7 +1420,7 @@ type
PegLexer {.inheritable.} = object ## the lexer object.
bufpos: int ## the current position within the buffer
buf: cstring ## the buffer itself
buf: string ## the buffer itself
lineNumber: int ## the current line number
lineStart: int ## index of last line start in buffer
colOffset: int ## column to add
@@ -1481,6 +1481,9 @@ proc handleHexChar(c: var PegLexer, xi: var int) =
proc getEscapedChar(c: var PegLexer, tok: var Token) =
inc(c.bufpos)
if c.bufpos >= len(c.buf):
tok.kind = tkInvalid
return
case c.buf[c.bufpos]
of 'r', 'R', 'c', 'C':
add(tok.literal, '\c')
@@ -1508,6 +1511,9 @@ proc getEscapedChar(c: var PegLexer, tok: var Token) =
inc(c.bufpos)
of 'x', 'X':
inc(c.bufpos)
if c.bufpos >= len(c.buf):
tok.kind = tkInvalid
return
var xi = 0
handleHexChar(c, xi)
handleHexChar(c, xi)
@@ -1517,7 +1523,7 @@ proc getEscapedChar(c: var PegLexer, tok: var Token) =
var val = ord(c.buf[c.bufpos]) - ord('0')
inc(c.bufpos)
var i = 1
while (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}):
while (c.bufpos < len(c.buf)) and (i <= 3) and (c.buf[c.bufpos] in {'0'..'9'}):
val = val * 10 + ord(c.buf[c.bufpos]) - ord('0')
inc(c.bufpos)
inc(i)
@@ -1571,7 +1577,7 @@ proc getString(c: var PegLexer, tok: var Token) =
proc getDollar(c: var PegLexer, tok: var Token) =
var pos = c.bufpos + 1
if c.buf[pos] in {'0'..'9'}:
if pos < c.buf.len and c.buf[pos] in {'0'..'9'}:
tok.kind = tkBackref
tok.index = 0
while pos < c.buf.len and c.buf[pos] in {'0'..'9'}:
@@ -1586,54 +1592,55 @@ proc getCharSet(c: var PegLexer, tok: var Token) =
tok.charset = {}
var pos = c.bufpos + 1
var caret = false
if c.buf[pos] == '^':
inc(pos)
caret = true
while pos < c.buf.len:
var ch: char
case c.buf[pos]
of ']':
if pos < c.buf.len: inc(pos)
break
of '\\':
c.bufpos = pos
getEscapedChar(c, tok)
pos = c.bufpos
ch = tok.literal[tok.literal.len-1]
of '\C', '\L', '\0':
tok.kind = tkInvalid
break
else:
ch = c.buf[pos]
if pos < c.buf.len:
if c.buf[pos] == '^':
inc(pos)
incl(tok.charset, ch)
if c.buf[pos] == '-':
if pos+1 < c.buf.len and c.buf[pos+1] == ']':
incl(tok.charset, '-')
inc(pos)
caret = true
while pos < c.buf.len:
var ch: char
case c.buf[pos]
of ']':
if pos < c.buf.len: inc(pos)
break
of '\\':
c.bufpos = pos
getEscapedChar(c, tok)
pos = c.bufpos
ch = tok.literal[tok.literal.len-1]
of '\C', '\L', '\0':
tok.kind = tkInvalid
break
else:
if pos+1 < c.buf.len:
ch = c.buf[pos]
inc(pos)
incl(tok.charset, ch)
if c.buf[pos] == '-':
if pos+1 < c.buf.len and c.buf[pos+1] == ']':
incl(tok.charset, '-')
inc(pos)
else:
break
var ch2: char
case c.buf[pos]
of '\\':
c.bufpos = pos
getEscapedChar(c, tok)
pos = c.bufpos
ch2 = tok.literal[tok.literal.len-1]
of '\C', '\L', '\0':
tok.kind = tkInvalid
break
else:
if pos+1 < c.buf.len:
ch2 = c.buf[pos]
inc(pos)
else:
break
for i in ord(ch)+1 .. ord(ch2):
incl(tok.charset, chr(i))
var ch2: char
case c.buf[pos]
of '\\':
c.bufpos = pos
getEscapedChar(c, tok)
pos = c.bufpos
ch2 = tok.literal[tok.literal.len-1]
of '\C', '\L', '\0':
tok.kind = tkInvalid
break
else:
if pos+1 < c.buf.len:
ch2 = c.buf[pos]
inc(pos)
else:
break
for i in ord(ch)+1 .. ord(ch2):
incl(tok.charset, chr(i))
c.bufpos = pos
if caret: tok.charset = {'\1'..'\xFF'} - tok.charset
@@ -1661,6 +1668,13 @@ proc getTok(c: var PegLexer, tok: var Token) =
setLen(tok.literal, 0)
skip(c)
if c.bufpos >= c.buf.len:
tok.kind = tkEof
tok.literal = "[EOF]"
add(tok.literal, '\0')
inc(c.bufpos)
return
case c.buf[c.bufpos]
of '{':
inc(c.bufpos)
@@ -1700,6 +1714,8 @@ proc getTok(c: var PegLexer, tok: var Token) =
of '$': getDollar(c, tok)
of 'a'..'z', 'A'..'Z', '\128'..'\255':
getSymbol(c, tok)
if c.bufpos >= c.buf.len:
return
if c.buf[c.bufpos] in {'\'', '"'} or
c.buf[c.bufpos] == '$' and c.bufpos+1 < c.buf.len and
c.buf[c.bufpos+1] in {'0'..'9'}:
@@ -1768,7 +1784,9 @@ proc arrowIsNextTok(c: PegLexer): bool =
# the only look ahead we need
var pos = c.bufpos
while pos < c.buf.len and c.buf[pos] in {'\t', ' '}: inc(pos)
result = c.buf[pos] == '<' and (pos+1 < c.buf.len) and c.buf[pos+1] == '-'
if pos+1 >= c.buf.len:
return
result = c.buf[pos] == '<' and c.buf[pos+1] == '-'
# ----------------------------- parser ----------------------------------------
@@ -2038,141 +2056,145 @@ proc escapePeg*(s: string): string =
if inQuote: result.add('\'')
when isMainModule:
assert escapePeg("abc''def'") == r"'abc'\x27\x27'def'\x27"
assert match("(a b c)", peg"'(' @ ')'")
assert match("W_HI_Le", peg"\y 'while'")
assert(not match("W_HI_L", peg"\y 'while'"))
assert(not match("W_HI_Le", peg"\y v'while'"))
assert match("W_HI_Le", peg"y'while'")
proc pegsTest() =
assert escapePeg("abc''def'") == r"'abc'\x27\x27'def'\x27"
assert match("(a b c)", peg"'(' @ ')'")
assert match("W_HI_Le", peg"\y 'while'")
assert(not match("W_HI_L", peg"\y 'while'"))
assert(not match("W_HI_Le", peg"\y v'while'"))
assert match("W_HI_Le", peg"y'while'")
assert($ +digits == $peg"\d+")
assert "0158787".match(peg"\d+")
assert "ABC 0232".match(peg"\w+\s+\d+")
assert "ABC".match(peg"\d+ / \w+")
assert($ +digits == $peg"\d+")
assert "0158787".match(peg"\d+")
assert "ABC 0232".match(peg"\w+\s+\d+")
assert "ABC".match(peg"\d+ / \w+")
var accum: seq[string] = @[]
for word in split("00232this02939is39an22example111", peg"\d+"):
accum.add(word)
assert(accum == @["this", "is", "an", "example"])
var accum: seq[string] = @[]
for word in split("00232this02939is39an22example111", peg"\d+"):
accum.add(word)
assert(accum == @["this", "is", "an", "example"])
assert matchLen("key", ident) == 3
assert matchLen("key", ident) == 3
var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident)
assert matchLen("key1= cal9", pattern) == 11
var pattern = sequence(ident, *whitespace, term('='), *whitespace, ident)
assert matchLen("key1= cal9", pattern) == 11
var ws = newNonTerminal("ws", 1, 1)
ws.rule = *whitespace
var ws = newNonTerminal("ws", 1, 1)
ws.rule = *whitespace
var expr = newNonTerminal("expr", 1, 1)
expr.rule = sequence(capture(ident), *sequence(
nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr)))
var expr = newNonTerminal("expr", 1, 1)
expr.rule = sequence(capture(ident), *sequence(
nonterminal(ws), term('+'), nonterminal(ws), nonterminal(expr)))
var c: Captures
var s = "a+b + c +d+e+f"
assert rawMatch(s, expr.rule, 0, c) == len(s)
var a = ""
for i in 0..c.ml-1:
a.add(substr(s, c.matches[i][0], c.matches[i][1]))
assert a == "abcdef"
#echo expr.rule
var c: Captures
var s = "a+b + c +d+e+f"
assert rawMatch(s, expr.rule, 0, c) == len(s)
var a = ""
for i in 0..c.ml-1:
a.add(substr(s, c.matches[i][0], c.matches[i][1]))
assert a == "abcdef"
#echo expr.rule
#const filename = "lib/devel/peg/grammar.txt"
#var grammar = parsePeg(newFileStream(filename, fmRead), filename)
#echo "a <- [abc]*?".match(grammar)
assert find("_____abc_______", term("abc"), 2) == 5
assert match("_______ana", peg"A <- 'ana' / . A")
assert match("abcs%%%", peg"A <- ..A / .A / '%'")
#const filename = "lib/devel/peg/grammar.txt"
#var grammar = parsePeg(newFileStream(filename, fmRead), filename)
#echo "a <- [abc]*?".match(grammar)
assert find("_____abc_______", term("abc"), 2) == 5
assert match("_______ana", peg"A <- 'ana' / . A")
assert match("abcs%%%", peg"A <- ..A / .A / '%'")
var matches: array[0..MaxSubpatterns-1, string]
if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}":
assert matches[0] == "abc"
else:
assert false
var matches: array[0..MaxSubpatterns-1, string]
if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}":
assert matches[0] == "abc"
else:
assert false
var g2 = peg"""S <- A B / C D
A <- 'a'+
B <- 'b'+
C <- 'c'+
D <- 'd'+
"""
assert($g2 == "((A B) / (C D))")
assert match("cccccdddddd", g2)
assert("var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
"var1<-keykey; var2<-key2key2")
assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
"$1<-$2$2; $1<-$2$2")
assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}")
var g2 = peg"""S <- A B / C D
A <- 'a'+
B <- 'b'+
C <- 'c'+
D <- 'd'+
"""
assert($g2 == "((A B) / (C D))")
assert match("cccccdddddd", g2)
assert("var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
"var1<-keykey; var2<-key2key2")
assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
"$1<-$2$2; $1<-$2$2")
assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}")
if "aaaaaa" =~ peg"'aa' !. / ({'a'})+":
assert matches[0] == "a"
else:
assert false
if "aaaaaa" =~ peg"'aa' !. / ({'a'})+":
assert matches[0] == "a"
else:
assert false
if match("abcdefg", peg"c {d} ef {g}", matches, 2):
assert matches[0] == "d"
assert matches[1] == "g"
else:
assert false
if match("abcdefg", peg"c {d} ef {g}", matches, 2):
assert matches[0] == "d"
assert matches[1] == "g"
else:
assert false
accum = @[]
for x in findAll("abcdef", peg".", 3):
accum.add(x)
assert(accum == @["d", "e", "f"])
accum = @[]
for x in findAll("abcdef", peg".", 3):
accum.add(x)
assert(accum == @["d", "e", "f"])
for x in findAll("abcdef", peg"^{.}", 3):
assert x == "d"
for x in findAll("abcdef", peg"^{.}", 3):
assert x == "d"
if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')":
assert matches[0] == "f"
assert matches[1] == "a, b"
else:
assert false
if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')":
assert matches[0] == "f"
assert matches[1] == "a, b"
else:
assert false
assert match("eine übersicht und außerdem", peg"(\letter \white*)+")
# ß is not a lower cased letter?!
assert match("eine übersicht und auerdem", peg"(\lower \white*)+")
assert match("EINE ÜBERSICHT UND AUSSERDEM", peg"(\upper \white*)+")
assert(not match("456678", peg"(\letter)+"))
assert match("eine übersicht und außerdem", peg"(\letter \white*)+")
# ß is not a lower cased letter?!
assert match("eine übersicht und auerdem", peg"(\lower \white*)+")
assert match("EINE ÜBERSICHT UND AUSSERDEM", peg"(\upper \white*)+")
assert(not match("456678", peg"(\letter)+"))
assert("var1 = key; var2 = key2".replacef(
peg"\skip(\s*) {\ident}'='{\ident}", "$1<-$2$2") ==
"var1<-keykey;var2<-key2key2")
assert("var1 = key; var2 = key2".replacef(
peg"\skip(\s*) {\ident}'='{\ident}", "$1<-$2$2") ==
"var1<-keykey;var2<-key2key2")
assert match("prefix/start", peg"^start$", 7)
assert match("prefix/start", peg"^start$", 7)
if "foo" =~ peg"{'a'}?.*":
assert matches[0].len == 0
else: assert false
if "foo" =~ peg"{'a'}?.*":
assert matches[0].len == 0
else: assert false
if "foo" =~ peg"{''}.*":
assert matches[0] == ""
else: assert false
if "foo" =~ peg"{''}.*":
assert matches[0] == ""
else: assert false
if "foo" =~ peg"{'foo'}":
assert matches[0] == "foo"
else: assert false
if "foo" =~ peg"{'foo'}":
assert matches[0] == "foo"
else: assert false
let empty_test = peg"^\d*"
let str = "XYZ"
let empty_test = peg"^\d*"
let str = "XYZ"
assert(str.find(empty_test) == 0)
assert(str.match(empty_test))
assert(str.find(empty_test) == 0)
assert(str.match(empty_test))
proc handleMatches*(m: int, n: int, c: openArray[string]): string =
result = ""
proc handleMatches(m: int, n: int, c: openArray[string]): string =
result = ""
if m > 0:
result.add ", "
if m > 0:
result.add ", "
result.add case n:
of 2: toLowerAscii(c[0]) & ": '" & c[1] & "'"
of 1: toLowerAscii(c[0]) & ": ''"
else: ""
result.add case n:
of 2: toLowerAscii(c[0]) & ": '" & c[1] & "'"
of 1: toLowerAscii(c[0]) & ": ''"
else: ""
assert("Var1=key1;var2=Key2; VAR3".
replace(peg"{\ident}('='{\ident})* ';'* \s*",
handleMatches) == "var1: 'key1', var2: 'Key2', var3: ''")
assert("Var1=key1;var2=Key2; VAR3".
replace(peg"{\ident}('='{\ident})* ';'* \s*",
handleMatches) == "var1: 'key1', var2: 'Key2', var3: ''")
doAssert "test1".match(peg"""{@}$""")
doAssert "test2".match(peg"""{(!$ .)*} $""")
doAssert "test1".match(peg"""{@}$""")
doAssert "test2".match(peg"""{(!$ .)*} $""")
pegsTest()
static:
pegsTest()