Add interpreting event parser proc to pegs module. (#8075)

* Added simple interpreting event parser to pegs module. * Has side-effects problem. * Macro solution works. * First flat callback test works. * Fixed namespace pollution. * Added handler for pkChar. * Replaced event parser test. * Started extensive docs. * 'callback' to 'handler' renaming part 1. * Renaming 'callback' to 'handler' part2, completed comments. * Fixed exported API pollution. * Added more event handler hooks, fixed comments. * Changed event parser addition entry. * Fixed variable names and comments. * Enhanced comment. * Leave handlers are not called for an unsuccessful match. * The three varieties of back-reference matches are processed in separate of-clauses now. * Improved hygiene and (almost) eliminated exports. * Trying to fix CI test breakage by eliminating export. * Trying to fix CI test breakage by eliminating exports. * Re-activated leave handler code execution for unsuccessful matches. * Eliminated the last export statement (with a funny smelling hack). * Make sure leave handler code is executed for all unsuccessful matcher cases. * Replaced local unicode.`==` with export.
2026-02-12 14:23:45 +00:00 · 2018-08-24 20:13:37 +02:00
parent d5e1d102df
commit f26ed1d540
3 changed files with 636 additions and 257 deletions
--- a/changelog.md
+++ b/changelog.md
@@ -107,6 +107,7 @@
 - ``parseOct`` and ``parseBin`` in parseutils now also support the ``maxLen`` argument similar to ``parseHexInt``.
 - Added the proc ``flush`` for memory mapped files.
 - Added the ``MemMapFileStream``.
+- Added a simple interpreting event parser template ``eventParser`` to the ``pegs`` module.
 - Added ``macros.copyLineInfo`` to copy lineInfo from other node.
 - Added ``system.ashr`` an arithmetic right shift for integers.

--- a/lib/pure/pegs.nim
+++ b/lib/pure/pegs.nim
@@ -20,11 +20,11 @@ include "system/inclrtl"
 const
  useUnicode = true ## change this to deactivate proper UTF-8 support

-import
-  strutils
+import strutils, macros

 when useUnicode:
  import unicode
+  export unicode.`==`

 const
  InlineThreshold = 5  ## number of leaves; -1 to disable inlining
@@ -74,7 +74,7 @@ type
    line: int                     ## line the symbol has been declared/used in
    col: int                      ## column the symbol has been declared/used in
    flags: set[NonTerminalFlag]   ## the nonterminal's flags
-    rule: Peg                   ## the rule that the symbol refers to
+    rule: Peg                     ## the rule that the symbol refers to
  Peg* {.shallow.} = object ## type that represents a PEG
    case kind: PegKind
    of pkEmpty..pkWhitespace: nil
@@ -86,25 +86,59 @@ type
    else: sons: seq[Peg]
  NonTerminal* = ref NonTerminalObj

-proc name*(nt: NonTerminal): string = nt.name
-proc line*(nt: NonTerminal): int = nt.line
-proc col*(nt: NonTerminal): int = nt.col
-proc flags*(nt: NonTerminal): set[NonTerminalFlag] = nt.flags
-proc rule*(nt: NonTerminal): Peg = nt.rule
-
 proc kind*(p: Peg): PegKind = p.kind
+  ## Returns the *PegKind* of a given *Peg* object.
+
 proc term*(p: Peg): string = p.term
+  ## Returns the *string* representation of a given *Peg* variant object 
+  ## where present.
+
 proc ch*(p: Peg): char = p.ch
+  ## Returns the *char* representation of a given *Peg* variant object 
+  ## where present.
+
 proc charChoice*(p: Peg): ref set[char] = p.charChoice
+  ## Returns the *charChoice* field of a given *Peg* variant object 
+  ## where present.
+
 proc nt*(p: Peg): NonTerminal = p.nt
+  ## Returns the *NonTerminal* object of a given *Peg* variant object 
+  ## where present.
+
 proc index*(p: Peg): range[0..MaxSubpatterns] = p.index
+  ## Returns the back-reference index of a captured sub-pattern in the
+  ## *Captures* object for a given *Peg* variant object where present.
+
 iterator items*(p: Peg): Peg {.inline.} =
+  ## Yields the child nodes of a *Peg* variant object where present.
  for s in p.sons:
    yield s
+
 iterator pairs*(p: Peg): (int, Peg) {.inline.} =
+  ## Yields the indices and child nodes of a *Peg* variant object where present.
  for i in 0 ..< p.sons.len:
    yield (i, p.sons[i])

+proc name*(nt: NonTerminal): string = nt.name
+  ## Gets the name of the symbol represented by the parent *Peg* object variant
+  ## of a given *NonTerminal*.
+
+proc line*(nt: NonTerminal): int = nt.line
+  ## Gets the line number of the definition of the parent *Peg* object variant
+  ## of a given *NonTerminal*.
+
+proc col*(nt: NonTerminal): int = nt.col
+  ## Gets the column number of the definition of the parent *Peg* object variant
+  ## of a given *NonTerminal*.
+
+proc flags*(nt: NonTerminal): set[NonTerminalFlag] = nt.flags
+  ## Gets the *NonTerminalFlag*-typed flags field of the parent *Peg* variant
+  ## object of a given *NonTerminal*.
+
+proc rule*(nt: NonTerminal): Peg = nt.rule
+  ## Gets the *Peg* object representing the rule definition of the parent *Peg*
+  ## object variant of a given *NonTerminal*. 
+
 proc term*(t: string): Peg {.nosideEffect, rtl, extern: "npegs$1Str".} =
  ## constructs a PEG from a terminal string
  if t.len != 1:
@@ -540,223 +574,497 @@ when not useUnicode:
  proc isTitle(a: char): bool {.inline.} = return false
  proc isWhiteSpace(a: char): bool {.inline.} = return a in {' ', '\9'..'\13'}

-proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
-               nosideEffect, rtl, extern: "npegs$1".} =
+template matchOrParse(mopProc: untyped): typed =
+  # Used to make the main matcher proc *rawMatch* as well as event parser
+  # procs. For the former, *enter* and *leave* event handler code generators
+  # are provided which just return *discard*.
+
+  proc mopProc(s: string, p: Peg, start: int, c: var Captures): int =
+    proc matchBackRef(s: string, p: Peg, start: int, c: var Captures): int =
+      # Parse handler code must run in an *of* clause of its own for each
+      # *PegKind*, so we encapsulate the identical clause body for
+      # *pkBackRef..pkBackRefIgnoreStyle* here.
+      if p.index >= c.ml: return -1
+      var (a, b) = c.matches[p.index]
+      var n: Peg
+      n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef))
+      n.term = s.substr(a, b)
+      mopProc(s, n, start, c)
+
+    case p.kind
+    of pkEmpty:
+      enter(pkEmpty, s, p, start)
+      result = 0 # match of length 0
+      leave(pkEmpty, s, p, start, result)
+    of pkAny:
+      enter(pkAny, s, p, start)
+      if start < s.len: result = 1
+      else: result = -1
+      leave(pkAny, s, p, start, result)
+    of pkAnyRune:
+      enter(pkAnyRune, s, p, start)
+      if start < s.len:
+        result = runeLenAt(s, start)
+      else:
+        result = -1
+      leave(pkAnyRune, s, p, start, result)
+    of pkLetter:
+      enter(pkLetter, s, p, start)
+      if start < s.len:
+        var a: Rune
+        result = start
+        fastRuneAt(s, result, a)
+        if isAlpha(a): dec(result, start)
+        else: result = -1
+      else:
+        result = -1
+      leave(pkLetter, s, p, start, result)
+    of pkLower:
+      enter(pkLower, s, p, start)
+      if start < s.len:
+        var a: Rune
+        result = start
+        fastRuneAt(s, result, a)
+        if isLower(a): dec(result, start)
+        else: result = -1
+      else:
+        result = -1
+      leave(pkLower, s, p, start, result)
+    of pkUpper:
+      enter(pkUpper, s, p, start)
+      if start < s.len:
+        var a: Rune
+        result = start
+        fastRuneAt(s, result, a)
+        if isUpper(a): dec(result, start)
+        else: result = -1
+      else:
+        result = -1
+      leave(pkUpper, s, p, start, result)
+    of pkTitle:
+      enter(pkTitle, s, p, start)
+      if start < s.len:
+        var a: Rune
+        result = start
+        fastRuneAt(s, result, a)
+        if isTitle(a): dec(result, start)
+        else: result = -1
+      else:
+        result = -1
+      leave(pkTitle, s, p, start, result)
+    of pkWhitespace:
+      enter(pkWhitespace, s, p, start)
+      if start < s.len:
+        var a: Rune
+        result = start
+        fastRuneAt(s, result, a)
+        if isWhiteSpace(a): dec(result, start)
+        else: result = -1
+      else:
+        result = -1
+      leave(pkWhitespace, s, p, start, result)
+    of pkGreedyAny:
+      enter(pkGreedyAny, s, p, start)
+      result = len(s) - start
+      leave(pkGreedyAny, s, p, start, result)
+    of pkNewLine:
+      enter(pkNewLine, s, p, start)
+      if start < s.len and s[start] == '\L': result = 1
+      elif start < s.len and s[start] == '\C':
+        if start+1 < s.len and s[start+1] == '\L': result = 2
+        else: result = 1
+      else: result = -1
+      leave(pkNewLine, s, p, start, result)
+    of pkTerminal:
+      enter(pkTerminal, s, p, start)
+      result = len(p.term)
+      for i in 0..result-1:
+        if start+i >= s.len or p.term[i] != s[start+i]:
+          result = -1
+          break
+      leave(pkTerminal, s, p, start, result)
+    of pkTerminalIgnoreCase:
+      enter(pkTerminalIgnoreCase, s, p, start)
+      var
+        i = 0
+        a, b: Rune
+      result = start
+      while i < len(p.term):
+        if result >= s.len:
+          result = -1
+          break
+        fastRuneAt(p.term, i, a)
+        fastRuneAt(s, result, b)
+        if toLower(a) != toLower(b):
+          result = -1
+          break
+      dec(result, start)
+      leave(pkTerminalIgnoreCase, s, p, start, result)
+    of pkTerminalIgnoreStyle:
+      enter(pkTerminalIgnoreStyle, s, p, start)
+      var
+        i = 0
+        a, b: Rune
+      result = start
+      while i < len(p.term):
+        while i < len(p.term):
+          fastRuneAt(p.term, i, a)
+          if a != Rune('_'): break
+        while result < s.len:
+          fastRuneAt(s, result, b)
+          if b != Rune('_'): break
+        if result >= s.len:
+          if i >= p.term.len: break
+          else:
+            result = -1
+            break
+        elif toLower(a) != toLower(b):
+          result = -1
+          break
+      dec(result, start)
+      leave(pkTerminalIgnoreStyle, s, p, start, result)
+    of pkChar:
+      enter(pkChar, s, p, start)
+      if start < s.len and p.ch == s[start]: result = 1
+      else: result = -1
+      leave(pkChar, s, p, start, result)
+    of pkCharChoice:
+      enter(pkCharChoice, s, p, start)
+      if start < s.len and contains(p.charChoice[], s[start]): result = 1
+      else: result = -1
+      leave(pkCharChoice, s, p, start, result)
+    of pkNonTerminal:
+      enter(pkNonTerminal, s, p, start)
+      var oldMl = c.ml
+      when false: echo "enter: ", p.nt.name
+      result = mopProc(s, p.nt.rule, start, c)
+      when false: echo "leave: ", p.nt.name
+      if result < 0: c.ml = oldMl
+      leave(pkNonTerminal, s, p, start, result)
+    of pkSequence:
+      enter(pkSequence, s, p, start)
+      var oldMl = c.ml
+      result = 0
+      for i in 0..high(p.sons):
+        var x = mopProc(s, p.sons[i], start+result, c)
+        if x < 0:
+          c.ml = oldMl
+          result = -1
+          break
+        else: inc(result, x)
+      leave(pkSequence, s, p, start, result)
+    of pkOrderedChoice:
+      enter(pkOrderedChoice, s, p, start)
+      var oldMl = c.ml
+      for i in 0..high(p.sons):
+        result = mopProc(s, p.sons[i], start, c)
+        if result >= 0: break
+        c.ml = oldMl
+      leave(pkOrderedChoice, s, p, start, result)
+    of pkSearch:
+      enter(pkSearch, s, p, start)
+      var oldMl = c.ml
+      result = 0
+      while start+result <= s.len:
+        var x = mopProc(s, p.sons[0], start+result, c)
+        if x >= 0:
+          inc(result, x)
+          leave(pkSearch, s, p, start, result)
+          return
+        inc(result)
+      result = -1
+      c.ml = oldMl
+      leave(pkSearch, s, p, start, result)
+    of pkCapturedSearch:
+      enter(pkCapturedSearch, s, p, start)
+      var idx = c.ml # reserve a slot for the subpattern
+      inc(c.ml)
+      result = 0
+      while start+result <= s.len:
+        var x = mopProc(s, p.sons[0], start+result, c)
+        if x >= 0:
+          if idx < MaxSubpatterns:
+            c.matches[idx] = (start, start+result-1)
+          #else: silently ignore the capture
+          inc(result, x)
+          leave(pkCapturedSearch, s, p, start, result)
+          return
+        inc(result)
+      result = -1
+      c.ml = idx
+      leave(pkCapturedSearch, s, p, start, result)
+    of pkGreedyRep:
+      enter(pkGreedyRep, s, p, start)
+      result = 0
+      while true:
+        var x = mopProc(s, p.sons[0], start+result, c)
+        # if x == 0, we have an endless loop; so the correct behaviour would be
+        # not to break. But endless loops can be easily introduced:
+        # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the
+        # expected thing in this case.
+        if x <= 0: break
+        inc(result, x)
+      leave(pkGreedyRep, s, p, start, result)
+    of pkGreedyRepChar:
+      enter(pkGreedyRepChar, s, p, start)
+      result = 0
+      var ch = p.ch
+      while start+result < s.len and ch == s[start+result]: inc(result)
+      leave(pkGreedyRepChar, s, p, start, result)
+    of pkGreedyRepSet:
+      enter(pkGreedyRepSet, s, p, start)
+      result = 0
+      while start+result < s.len and contains(p.charChoice[], s[start+result]): inc(result)
+      leave(pkGreedyRepSet, s, p, start, result)
+    of pkOption:
+      enter(pkOption, s, p, start)
+      result = max(0, mopProc(s, p.sons[0], start, c))
+      leave(pkOption, s, p, start, result)
+    of pkAndPredicate:
+      enter(pkAndPredicate, s, p, start)
+      var oldMl = c.ml
+      result = mopProc(s, p.sons[0], start, c)
+      if result >= 0: result = 0 # do not consume anything
+      else: c.ml = oldMl
+      leave(pkAndPredicate, s, p, start, result)
+    of pkNotPredicate:
+      enter(pkNotPredicate, s, p, start)
+      var oldMl = c.ml
+      result = mopProc(s, p.sons[0], start, c)
+      if result < 0: result = 0
+      else:
+        c.ml = oldMl
+        result = -1
+      leave(pkNotPredicate, s, p, start, result)
+    of pkCapture:
+      enter(pkCapture, s, p, start)
+      var idx = c.ml # reserve a slot for the subpattern
+      inc(c.ml)
+      result = mopProc(s, p.sons[0], start, c)
+      if result >= 0:
+        if idx < MaxSubpatterns:
+          c.matches[idx] = (start, start+result-1)
+        #else: silently ignore the capture
+      else:
+        c.ml = idx
+      leave(pkCapture, s, p, start, result)
+    of pkBackRef:
+      enter(pkBackRef, s, p, start)
+      result = matchBackRef(s, p, start, c)
+      leave(pkBackRef, s, p, start, result)
+    of pkBackRefIgnoreCase:
+      enter(pkBackRefIgnoreCase, s, p, start)
+      result = matchBackRef(s, p, start, c)
+      leave(pkBackRefIgnoreCase, s, p, start, result)
+    of pkBackRefIgnoreStyle:
+      enter(pkBackRefIgnoreStyle, s, p, start)
+      result = matchBackRef(s, p, start, c)
+      leave(pkBackRefIgnoreStyle, s, p, start, result)
+    of pkStartAnchor:
+      enter(pkStartAnchor, s, p, start)
+      if c.origStart == start: result = 0
+      else: result = -1
+      leave(pkStartAnchor, s, p, start, result)
+    of pkRule, pkList: assert false
+
+proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int
+      {.noSideEffect, rtl, extern: "npegs$1".} =
  ## low-level matching proc that implements the PEG interpreter. Use this
  ## for maximum efficiency (every other PEG operation ends up calling this
  ## proc).
  ## Returns -1 if it does not match, else the length of the match
-  case p.kind
-  of pkEmpty: result = 0 # match of length 0
-  of pkAny:
-    if start < s.len: result = 1
-    else: result = -1
-  of pkAnyRune:
-    if start < s.len:
-      result = runeLenAt(s, start)
-    else:
-      result = -1
-  of pkLetter:
-    if start < s.len:
-      var a: Rune
-      result = start
-      fastRuneAt(s, result, a)
-      if isAlpha(a): dec(result, start)
-      else: result = -1
-    else:
-      result = -1
-  of pkLower:
-    if start < s.len:
-      var a: Rune
-      result = start
-      fastRuneAt(s, result, a)
-      if isLower(a): dec(result, start)
-      else: result = -1
-    else:
-      result = -1
-  of pkUpper:
-    if start < s.len:
-      var a: Rune
-      result = start
-      fastRuneAt(s, result, a)
-      if isUpper(a): dec(result, start)
-      else: result = -1
-    else:
-      result = -1
-  of pkTitle:
-    if start < s.len:
-      var a: Rune
-      result = start
-      fastRuneAt(s, result, a)
-      if isTitle(a): dec(result, start)
-      else: result = -1
-    else:
-      result = -1
-  of pkWhitespace:
-    if start < s.len:
-      var a: Rune
-      result = start
-      fastRuneAt(s, result, a)
-      if isWhiteSpace(a): dec(result, start)
-      else: result = -1
-    else:
-      result = -1
-  of pkGreedyAny:
-    result = len(s) - start
-  of pkNewLine:
-    if start < s.len and s[start] == '\L': result = 1
-    elif start < s.len and s[start] == '\C':
-      if start+1 < s.len and s[start+1] == '\L': result = 2
-      else: result = 1
-    else: result = -1
-  of pkTerminal:
-    result = len(p.term)
-    for i in 0..result-1:
-      if start+i >= s.len or p.term[i] != s[start+i]:
-        result = -1
-        break
-  of pkTerminalIgnoreCase:
-    var
-      i = 0
-      a, b: Rune
-    result = start
-    while i < len(p.term):
-      if result >= s.len:
-        result = -1
-        break
-      fastRuneAt(p.term, i, a)
-      fastRuneAt(s, result, b)
-      if toLower(a) != toLower(b):
-        result = -1
-        break
-    dec(result, start)
-  of pkTerminalIgnoreStyle:
-    var
-      i = 0
-      a, b: Rune
-    result = start
-    while i < len(p.term):
-      while i < len(p.term):
-        fastRuneAt(p.term, i, a)
-        if a != Rune('_'): break
-      while result < s.len:
-        fastRuneAt(s, result, b)
-        if b != Rune('_'): break
-      if result >= s.len:
-        if i >= p.term.len: break
+
+  # Set the handler generators to produce do-nothing handlers.
+  template enter(pk, s, p, start) =
+    discard
+  template leave(pk, s, p, start, length) =
+    discard
+  matchOrParse(matchIt)
+  result = matchIt(s, p, start, c)
+
+macro mkHandlerTplts(handlers: untyped): untyped =
+  # Transforms the handler spec in *handlers* into handler templates.
+  # The AST structure of *handlers[0]*:
+  # 
+  # .. code-block::
+  # StmtList
+  #   Call
+  #     Ident "pkNonTerminal"
+  #     StmtList
+  #       Call
+  #         Ident "enter"
+  #         StmtList
+  #           <handler code block>
+  #       Call
+  #         Ident "leave"
+  #         StmtList
+  #           <handler code block>
+  #   Call
+  #     Ident "pkChar"
+  #     StmtList
+  #       Call
+  #         Ident "leave"
+  #         StmtList
+  #           <handler code block>
+  #   ...
+  proc mkEnter(hdName, body: NimNode): NimNode =
+    quote do:
+      template `hdName`(s, p, start) =
+        let s {.inject.} = s
+        let p {.inject.} = p
+        let start {.inject.} = start
+        `body`
+
+  template mkLeave(hdPostf, body) {.dirty.} =
+    # this has to be dirty to be able to capture *result* as *length* in
+    # *leaveXX* calls.
+    template `leave hdPostf`(s, p, start, length) =
+      body
+
+  result = newStmtList()
+  for topCall in handlers[0]:
+    if nnkCall != topCall.kind:
+      error("Call syntax expected.", topCall)
+    let pegKind = topCall[0]
+    if nnkIdent != pegKind.kind:
+      error("PegKind expected.", pegKind)
+    if 2 == topCall.len:
+      for hdDef in topCall[1]:
+        if nnkCall != hdDef.kind:
+          error("Call syntax expected.", hdDef)
+        if nnkIdent != hdDef[0].kind:
+          error("Handler identifier expected.", hdDef[0])
+        if 2 == hdDef.len:
+          let hdPostf = substr(pegKind.strVal, 2)
+          case hdDef[0].strVal
+          of "enter":
+            result.add mkEnter(newIdentNode("enter" & hdPostf), hdDef[1])
+          of "leave":
+            result.add getAst(mkLeave(ident(hdPostf), hdDef[1]))
+          else:
+            error(
+              "Unsupported handler identifier, expected 'enter' or 'leave'.",
+              hdDef[0]
+            )
+
+template eventParser*(pegAst, handlers: untyped): (proc(s: string): int) =
+  ## Generates an interpreting event parser *proc* according to the specified
+  ## PEG AST and handler code blocks. The *proc* can be called with a string
+  ## to be parsed and will execute the handler code blocks whenever their
+  ## associated grammar element is matched. It returns -1 if the string does not
+  ## match, else the length of the total match. The following example code
+  ## evaluates an arithmetic expression defined by a simple PEG:
+  ##
+  ## .. code-block:: nim
+  ##  import strutils, pegs
+  ##
+  ##  let
+  ##    pegAst = """
+  ##  Expr    <- Sum
+  ##  Sum     <- Product (('+' / '-')Product)*
+  ##  Product <- Value (('*' / '/')Value)*
+  ##  Value   <- [0-9]+ / '(' Expr ')'
+  ##    """.peg
+  ##    txt = "(5+3)/2-7*22"
+  ##
+  ##  var
+  ##    pStack: seq[string] = @[]
+  ##    valStack: seq[float] = @[]
+  ##    opStack = ""
+  ##  let
+  ##    parseArithExpr = pegAst.eventParser:
+  ##      pkNonTerminal:
+  ##        enter:
+  ##          pStack.add p.nt.name
+  ##        leave:
+  ##          pStack.setLen pStack.high
+  ##          if length > 0:
+  ##            let matchStr = s.substr(start, start+length-1)
+  ##            case p.nt.name
+  ##            of "Value":
+  ##              try:
+  ##                valStack.add matchStr.parseFloat
+  ##                echo valStack
+  ##              except ValueError:
+  ##                discard
+  ##            of "Sum", "Product":
+  ##              try:
+  ##                let val = matchStr.parseFloat
+  ##              except ValueError:
+  ##                if valStack.len > 1 and opStack.len > 0:
+  ##                  valStack[^2] = case opStack[^1]
+  ##                  of '+': valStack[^2] + valStack[^1]
+  ##                  of '-': valStack[^2] - valStack[^1]
+  ##                  of '*': valStack[^2] * valStack[^1]
+  ##                  else: valStack[^2] / valStack[^1]
+  ##                  valStack.setLen valStack.high
+  ##                  echo valStack
+  ##                  opStack.setLen opStack.high
+  ##                  echo opStack
+  ##      pkChar:
+  ##        leave:
+  ##          if length == 1 and "Value" != pStack[^1]:
+  ##            let matchChar = s[start]
+  ##            opStack.add matchChar
+  ##            echo opStack
+  ##
+  ##  let pLen = parseArithExpr(txt)
+  ## 
+  ## The *handlers* parameter consists of code blocks for *PegKinds*,
+  ## which define the grammar elements of interest. Each block can contain
+  ## handler code to be executed when the parser enters and leaves text
+  ## matching the grammar element. An *enter* handler can access the specific
+  ## PEG AST node being matched as *p*, the entire parsed string as *s*
+  ## and the position of the matched text segment in *s* as *start*. A *leave*
+  ## handler can access *p*, *s*, *start* and also the length of the matched
+  ## text segment as *length*. For an unsuccessful match, the *enter* and
+  ## *leave* handlers will be executed, with *length* set to -1.
+  ##
+  ## Symbols  declared in an *enter* handler can be made visible in the
+  ## corresponding *leave* handler by annotating them with an *inject* pragma.
+  proc rawParse(s: string, p: Peg, start: int, c: var Captures): int
+      {.genSym.} =
+
+    # binding from *macros*
+    bind strVal
+
+    mkHandlerTplts:
+      handlers
+
+    macro enter(pegKind, s, pegNode, start: untyped): untyped =
+      # This is called by the matcher code in *matchOrParse* at the
+      # start of the code for a grammar element of kind *pegKind*.
+      # Expands to a call to the handler template if one was generated
+      # by *mkHandlerTplts*.
+      template mkDoEnter(hdPostf, s, pegNode, start) =
+        when declared(`enter hdPostf`):
+          `enter hdPostf`(s, pegNode, start):
        else:
-          result = -1
-          break
-      elif toLower(a) != toLower(b):
-        result = -1
-        break
-    dec(result, start)
-  of pkChar:
-    if start < s.len and p.ch == s[start]: result = 1
-    else: result = -1
-  of pkCharChoice:
-    if start < s.len and contains(p.charChoice[], s[start]): result = 1
-    else: result = -1
-  of pkNonTerminal:
-    var oldMl = c.ml
-    when false: echo "enter: ", p.nt.name
-    result = rawMatch(s, p.nt.rule, start, c)
-    when false: echo "leave: ", p.nt.name
-    if result < 0: c.ml = oldMl
-  of pkSequence:
-    var oldMl = c.ml
-    result = 0
-    for i in 0..high(p.sons):
-      var x = rawMatch(s, p.sons[i], start+result, c)
-      if x < 0:
-        c.ml = oldMl
-        result = -1
-        break
-      else: inc(result, x)
-  of pkOrderedChoice:
-    var oldMl = c.ml
-    for i in 0..high(p.sons):
-      result = rawMatch(s, p.sons[i], start, c)
-      if result >= 0: break
-      c.ml = oldMl
-  of pkSearch:
-    var oldMl = c.ml
-    result = 0
-    while start+result <= s.len:
-      var x = rawMatch(s, p.sons[0], start+result, c)
-      if x >= 0:
-        inc(result, x)
-        return
-      inc(result)
-    result = -1
-    c.ml = oldMl
-  of pkCapturedSearch:
-    var idx = c.ml # reserve a slot for the subpattern
-    inc(c.ml)
-    result = 0
-    while start+result <= s.len:
-      var x = rawMatch(s, p.sons[0], start+result, c)
-      if x >= 0:
-        if idx < MaxSubpatterns:
-          c.matches[idx] = (start, start+result-1)
-        #else: silently ignore the capture
-        inc(result, x)
-        return
-      inc(result)
-    result = -1
-    c.ml = idx
-  of pkGreedyRep:
-    result = 0
-    while true:
-      var x = rawMatch(s, p.sons[0], start+result, c)
-      # if x == 0, we have an endless loop; so the correct behaviour would be
-      # not to break. But endless loops can be easily introduced:
-      # ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the
-      # expected thing in this case.
-      if x <= 0: break
-      inc(result, x)
-  of pkGreedyRepChar:
-    result = 0
-    var ch = p.ch
-    while start+result < s.len and ch == s[start+result]: inc(result)
-  of pkGreedyRepSet:
-    result = 0
-    while start+result < s.len and contains(p.charChoice[], s[start+result]): inc(result)
-  of pkOption:
-    result = max(0, rawMatch(s, p.sons[0], start, c))
-  of pkAndPredicate:
-    var oldMl = c.ml
-    result = rawMatch(s, p.sons[0], start, c)
-    if result >= 0: result = 0 # do not consume anything
-    else: c.ml = oldMl
-  of pkNotPredicate:
-    var oldMl = c.ml
-    result = rawMatch(s, p.sons[0], start, c)
-    if result < 0: result = 0
-    else:
-      c.ml = oldMl
-      result = -1
-  of pkCapture:
-    var idx = c.ml # reserve a slot for the subpattern
-    inc(c.ml)
-    result = rawMatch(s, p.sons[0], start, c)
-    if result >= 0:
-      if idx < MaxSubpatterns:
-        c.matches[idx] = (start, start+result-1)
-      #else: silently ignore the capture
-    else:
-      c.ml = idx
-  of pkBackRef..pkBackRefIgnoreStyle:
-    if p.index >= c.ml: return -1
-    var (a, b) = c.matches[p.index]
-    var n: Peg
-    n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef))
-    n.term = s.substr(a, b)
-    result = rawMatch(s, n, start, c)
-  of pkStartAnchor:
-    if c.origStart == start: result = 0
-    else: result = -1
-  of pkRule, pkList: assert false
+          discard
+      let hdPostf = ident(substr(strVal(pegKind), 2))
+      getAst(mkDoEnter(hdPostf, s, pegNode, start))
+
+    macro leave(pegKind, s, pegNode, start, length: untyped): untyped =
+      # Like *enter*, but called at the end of the matcher code for
+      # a grammar element of kind *pegKind*.
+      template mkDoLeave(hdPostf, s, pegNode, start, length) =
+        when declared(`leave hdPostf`):
+          `leave hdPostf`(s, pegNode, start, length):
+        else:
+          discard
+      let hdPostf = ident(substr(strVal(pegKind), 2))
+      getAst(mkDoLeave(hdPostf, s, pegNode, start, length))
+
+    matchOrParse(parseIt)
+    parseIt(s, p, start, c)
+
+  proc parser(s: string): int {.genSym.} =
+    # the proc to be returned
+    var
+      ms: array[MaxSubpatterns, (int, int)]
+      cs = Captures(matches: ms, ml: 0, origStart: 0)
+    rawParse(s, pegAst, 0, cs)
+  parser

 template fillMatches(s, caps, c) =
  for k in 0..c.ml-1:
--- a/tests/stdlib/tpegs.nim
+++ b/tests/stdlib/tpegs.nim
@@ -1,5 +1,7 @@
 discard """
  output: '''
+PEG AST traversal output
+------------------------
 pkNonTerminal: Sum @(2, 3)
  pkSequence: (Product (('+' / '-') Product)*)
    pkNonTerminal: Product @(3, 7)
@@ -26,6 +28,25 @@ pkNonTerminal: Sum @(2, 3)
          pkChar: '+'
          pkChar: '-'
        pkNonTerminal: Product @(3, 7)
+
+Event parser output
+-------------------
+@[5.0]
+
+@[5.0, 3.0]
+@[8.0]
+
+/
+@[8.0, 2.0]
+@[4.0]
+
+-
+@[4.0, 7.0]
+-*
+@[4.0, 7.0, 22.0]
+@[4.0, 154.0]
+-
+@[-150.0]
 '''
 """

@@ -36,43 +57,92 @@ const
  indent = "  "

 let
-  pegSrc = """
-Expr <- Sum
-Sum <- Product (('+' / '-') Product)*
-Product <- Value (('*' / '/') Value)*
-Value <- [0-9]+ / '(' Expr ')'
-  """
-  pegAst: Peg = pegSrc.peg
+  pegAst = """
+Expr    <- Sum
+Sum     <- Product (('+' / '-')Product)*
+Product <- Value (('*' / '/')Value)*
+Value   <- [0-9]+ / '(' Expr ')'
+  """.peg
+  txt = "(5+3)/2-7*22"

-var
-  outp = newStringStream()
-  processed: seq[string] = @[]
+block:
+  var
+    outp = newStringStream()
+    processed: seq[string] = @[]

-proc prt(outp: Stream, kind: PegKind, s: string; level: int = 0) =
-  outp.writeLine indent.repeat(level) & "$1: $2" % [$kind, s]
+  proc prt(outp: Stream, kind: PegKind, s: string; level: int = 0) =
+    outp.writeLine indent.repeat(level) & "$1: $2" % [$kind, s]

-proc recLoop(p: Peg, level: int = 0) =
-  case p.kind
-  of pkEmpty..pkWhitespace:
-    discard
-  of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle:
-    outp.prt(p.kind, $p, level)
-  of pkChar, pkGreedyRepChar:
-    outp.prt(p.kind, $p, level)
-  of pkCharChoice, pkGreedyRepSet:
-    outp.prt(p.kind, $p, level)
-  of pkNonTerminal:
-    outp.prt(p.kind,
-      "$1 @($3, $4)" % [p.nt.name, $p.nt.rule.kind, $p.nt.line, $p.nt.col], level)
-    if not(p.nt.name in processed):
-      processed.add p.nt.name
-      p.nt.rule.recLoop level+1
-  of pkBackRef..pkBackRefIgnoreStyle:
-    outp.prt(p.kind, $p, level)
-  else:
-    outp.prt(p.kind, $p, level)
-    for s in items(p):
-      s.recLoop level+1
+  proc recLoop(p: Peg, level: int = 0) =
+    case p.kind
+    of pkEmpty..pkWhitespace:
+      discard
+    of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle:
+      outp.prt(p.kind, $p, level)
+    of pkChar, pkGreedyRepChar:
+      outp.prt(p.kind, $p, level)
+    of pkCharChoice, pkGreedyRepSet:
+      outp.prt(p.kind, $p, level)
+    of pkNonTerminal:
+      outp.prt(p.kind,
+        "$1 @($3, $4)" % [p.nt.name, $p.nt.rule.kind, $p.nt.line, $p.nt.col], level)
+      if not(p.nt.name in processed):
+        processed.add p.nt.name
+        p.nt.rule.recLoop level+1
+    of pkBackRef..pkBackRefIgnoreStyle:
+      outp.prt(p.kind, $p, level)
+    else:
+      outp.prt(p.kind, $p, level)
+      for s in items(p):
+        s.recLoop level+1

-pegAst.recLoop
-echo outp.data
+  pegAst.recLoop
+  echo "PEG AST traversal output"
+  echo "------------------------"
+  echo outp.data
+
+block:
+  var
+    pStack: seq[string] = @[]
+    valStack: seq[float] = @[]
+    opStack = ""
+  let
+    parseArithExpr = pegAst.eventParser:
+      pkNonTerminal:
+        enter:
+          pStack.add p.nt.name
+        leave:
+          pStack.setLen pStack.high
+          if length > 0:
+            let matchStr = s.substr(start, start+length-1)
+            case p.nt.name
+            of "Value":
+              try:
+                valStack.add matchStr.parseFloat
+                echo valStack
+              except ValueError:
+                discard
+            of "Sum", "Product":
+              try:
+                let val = matchStr.parseFloat
+              except ValueError:
+                if valStack.len > 1 and opStack.len > 0:
+                  valStack[^2] = case opStack[^1]
+                  of '+': valStack[^2] + valStack[^1]
+                  of '-': valStack[^2] - valStack[^1]
+                  of '*': valStack[^2] * valStack[^1]
+                  else: valStack[^2] / valStack[^1]
+                  valStack.setLen valStack.high
+                  echo valStack
+                  opStack.setLen opStack.high
+                  echo opStack
+      pkChar:
+        leave:
+          if length == 1 and "Value" != pStack[^1]:
+            let matchChar = s[start]
+            opStack.add matchChar
+            echo opStack
+  echo "Event parser output"
+  echo "-------------------"
+  let pLen = parseArithExpr(txt)
+  assert txt.len == pLen