pegs: captured search loop

This commit is contained in:
Araq
2010-11-07 23:52:41 +01:00
parent 7659739caf
commit 8ee63f9836
3 changed files with 55 additions and 5 deletions

View File

@@ -66,6 +66,10 @@ notation meaning
failure.
``@E`` Search: Shorthand for ``(!E .)* E``. (Search loop for the
pattern `E`.)
``{@} E`` Captured Search: Shorthand for ``{(!E .)*} E``. (Search
loop for the pattern `E`.) Everything until and exluding
`E` is captured.
``@@ E`` Same as ``{@} E``.
``A <- E`` Rule: Bind the expression `E` to the *nonterminal symbol*
`A`. **Left recursive rules are not possible and crash the
matching engine.**
@@ -131,7 +135,7 @@ The PEG parser implements this grammar (written in PEG syntax)::
rule <- identifier \s* "<-" expr ig
identNoArrow <- identifier !(\s* "<-")
prefixOpr <- ig '&' / ig '!' / ig '@'
prefixOpr <- ig '&' / ig '!' / ig '@' / ig '{@}' / ig '@@'
literal <- ig identifier? '$' [0-9]+
ig identNoArrow /
ig charset /

View File

@@ -58,6 +58,7 @@ type
pkBackRefIgnoreCase,
pkBackRefIgnoreStyle,
pkSearch, ## @a --> Internal DSL: @a
pkCapturedSearch, ## {@} a --> Internal DSL: @@a
pkRule, ## a <- b
pkList ## a, b
TNonTerminalFlag = enum
@@ -192,6 +193,11 @@ proc `@`*(a: TPeg): TPeg {.nosideEffect, rtl, extern: "npegsSearch".} =
## constructs a "search" for the PEG `a`
result.kind = pkSearch
result.sons = @[a]
proc `@@`*(a: TPeg): TPeg {.noSideEffect, rtl,
extern: "npgegsCapturedSearch".} =
result.kind = pkCapturedSearch
result.sons = @[a]
when false:
proc contains(a: TPeg, k: TPegKind): bool =
@@ -421,6 +427,9 @@ proc toStrAux(r: TPeg, res: var string) =
of pkSearch:
add(res, '@')
toStrAux(r.sons[0], res)
of pkCapturedSearch:
add(res, "{@}")
toStrAux(r.sons[0], res)
of pkCapture:
add(res, '{')
toStrAux(r.sons[0], res)
@@ -558,6 +567,21 @@ proc m(s: string, p: TPeg, start: int, c: var TMatchClosure): int =
inc(result)
result = -1
c.ml = oldMl
of pkCapturedSearch:
var idx = c.ml # reserve a slot for the subpattern
inc(c.ml)
result = 0
while start+result < s.len:
var x = m(s, p.sons[0], start+result, c)
if x >= 0:
if idx < maxSubpatterns:
c.matches[idx] = (start, start+result-1)
#else: silently ignore the capture
inc(result, x)
return
inc(result)
result = -1
c.ml = idx
of pkGreedyRep:
result = 0
while true:
@@ -850,6 +874,7 @@ type
tkParRi, ## ')'
tkCurlyLe, ## '{'
tkCurlyRi, ## '}'
tkCurlyAt, ## '{@}'
tkArrow, ## '<-'
tkBar, ## '/'
tkStar, ## '*'
@@ -880,7 +905,8 @@ type
const
tokKindToStr: array[TTokKind, string] = [
"invalid", "[EOF]", ".", "_", "identifier", "string literal",
"character set", "(", ")", "{", "}", "<-", "/", "*", "+", "&", "!", "?",
"character set", "(", ")", "{", "}", "{@}",
"<-", "/", "*", "+", "&", "!", "?",
"@", "built-in", "escaped", "$"
]
@@ -1112,9 +1138,14 @@ proc getTok(c: var TPegLexer, tok: var TToken) =
skip(c)
case c.buf[c.bufpos]
of '{':
tok.kind = tkCurlyLe
inc(c.bufpos)
add(tok.literal, '{')
if c.buf[c.bufpos] == '@' and c.buf[c.bufpos+1] == '}':
tok.kind = tkCurlyAt
inc(c.bufpos, 2)
add(tok.literal, "{@}")
else:
tok.kind = tkCurlyLe
add(tok.literal, '{')
of '}':
tok.kind = tkCurlyRi
inc(c.bufpos)
@@ -1193,6 +1224,10 @@ proc getTok(c: var TPegLexer, tok: var TToken) =
tok.kind = tkAt
inc(c.bufpos)
add(tok.literal, '@')
if c.buf[c.bufpos] == '@':
tok.kind = tkCurlyAt
inc(c.bufpos)
add(tok.literal, '@')
else:
add(tok.literal, c.buf[c.bufpos])
inc(c.bufpos)
@@ -1261,6 +1296,9 @@ proc primary(p: var TPegParser): TPeg =
of tkAt:
getTok(p)
return @primary(p)
of tkCurlyAt:
getTok(p)
return @@primary(p)
else: nil
case p.tok.kind
of tkIdentifier:
@@ -1346,7 +1384,7 @@ proc seqExpr(p: var TPegParser): TPeg =
while true:
case p.tok.kind
of tkAmp, tkNot, tkAt, tkStringLit, tkCharset, tkParLe, tkCurlyLe,
tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar:
tkAny, tkAnyRune, tkBuiltin, tkEscaped, tkDollar, tkCurlyAt:
result = sequence(result, primary(p))
of tkIdentifier:
if not arrowIsNextTok(p):
@@ -1514,4 +1552,11 @@ when isMainModule:
for x in findAll("abcdef", peg"{.}", 3):
echo x
if "f(a, b)" =~ peg"{[0-9]+} / ({\ident} '(' {@} ')')":
assert matches[0] == "f"
assert matches[1] == "a, b"
else:
assert false

View File

@@ -21,6 +21,7 @@ Additions
- Added ``re.findAll``, ``pegs.findAll``.
- Added ``os.findExe``.
- The Pegs module supports a *captured search loop operator* ``{@}``.
2010-10-20 Version 0.8.10 released