mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-28 17:04:41 +00:00
Add interpreting event parser proc to pegs module. (#8075)
* Added simple interpreting event parser to pegs module. * Has side-effects problem. * Macro solution works. * First flat callback test works. * Fixed namespace pollution. * Added handler for pkChar. * Replaced event parser test. * Started extensive docs. * 'callback' to 'handler' renaming part 1. * Renaming 'callback' to 'handler' part2, completed comments. * Fixed exported API pollution. * Added more event handler hooks, fixed comments. * Changed event parser addition entry. * Fixed variable names and comments. * Enhanced comment. * Leave handlers are not called for an unsuccessful match. * The three varieties of back-reference matches are processed in separate of-clauses now. * Improved hygiene and (almost) eliminated exports. * Trying to fix CI test breakage by eliminating export. * Trying to fix CI test breakage by eliminating exports. * Re-activated leave handler code execution for unsuccessful matches. * Eliminated the last export statement (with a funny smelling hack). * Make sure leave handler code is executed for all unsuccessful matcher cases. * Replaced local unicode.`==` with export.
This commit is contained in:
@@ -107,6 +107,7 @@
|
||||
- ``parseOct`` and ``parseBin`` in parseutils now also support the ``maxLen`` argument similar to ``parseHexInt``.
|
||||
- Added the proc ``flush`` for memory mapped files.
|
||||
- Added the ``MemMapFileStream``.
|
||||
- Added a simple interpreting event parser template ``eventParser`` to the ``pegs`` module.
|
||||
- Added ``macros.copyLineInfo`` to copy lineInfo from other node.
|
||||
- Added ``system.ashr`` an arithmetic right shift for integers.
|
||||
|
||||
|
||||
@@ -20,11 +20,11 @@ include "system/inclrtl"
|
||||
const
|
||||
useUnicode = true ## change this to deactivate proper UTF-8 support
|
||||
|
||||
import
|
||||
strutils
|
||||
import strutils, macros
|
||||
|
||||
when useUnicode:
|
||||
import unicode
|
||||
export unicode.`==`
|
||||
|
||||
const
|
||||
InlineThreshold = 5 ## number of leaves; -1 to disable inlining
|
||||
@@ -74,7 +74,7 @@ type
|
||||
line: int ## line the symbol has been declared/used in
|
||||
col: int ## column the symbol has been declared/used in
|
||||
flags: set[NonTerminalFlag] ## the nonterminal's flags
|
||||
rule: Peg ## the rule that the symbol refers to
|
||||
rule: Peg ## the rule that the symbol refers to
|
||||
Peg* {.shallow.} = object ## type that represents a PEG
|
||||
case kind: PegKind
|
||||
of pkEmpty..pkWhitespace: nil
|
||||
@@ -86,25 +86,59 @@ type
|
||||
else: sons: seq[Peg]
|
||||
NonTerminal* = ref NonTerminalObj
|
||||
|
||||
proc name*(nt: NonTerminal): string = nt.name
|
||||
proc line*(nt: NonTerminal): int = nt.line
|
||||
proc col*(nt: NonTerminal): int = nt.col
|
||||
proc flags*(nt: NonTerminal): set[NonTerminalFlag] = nt.flags
|
||||
proc rule*(nt: NonTerminal): Peg = nt.rule
|
||||
|
||||
proc kind*(p: Peg): PegKind = p.kind
|
||||
## Returns the *PegKind* of a given *Peg* object.
|
||||
|
||||
proc term*(p: Peg): string = p.term
|
||||
## Returns the *string* representation of a given *Peg* variant object
|
||||
## where present.
|
||||
|
||||
proc ch*(p: Peg): char = p.ch
|
||||
## Returns the *char* representation of a given *Peg* variant object
|
||||
## where present.
|
||||
|
||||
proc charChoice*(p: Peg): ref set[char] = p.charChoice
|
||||
## Returns the *charChoice* field of a given *Peg* variant object
|
||||
## where present.
|
||||
|
||||
proc nt*(p: Peg): NonTerminal = p.nt
|
||||
## Returns the *NonTerminal* object of a given *Peg* variant object
|
||||
## where present.
|
||||
|
||||
proc index*(p: Peg): range[0..MaxSubpatterns] = p.index
|
||||
## Returns the back-reference index of a captured sub-pattern in the
|
||||
## *Captures* object for a given *Peg* variant object where present.
|
||||
|
||||
iterator items*(p: Peg): Peg {.inline.} =
|
||||
## Yields the child nodes of a *Peg* variant object where present.
|
||||
for s in p.sons:
|
||||
yield s
|
||||
|
||||
iterator pairs*(p: Peg): (int, Peg) {.inline.} =
|
||||
## Yields the indices and child nodes of a *Peg* variant object where present.
|
||||
for i in 0 ..< p.sons.len:
|
||||
yield (i, p.sons[i])
|
||||
|
||||
proc name*(nt: NonTerminal): string = nt.name
|
||||
## Gets the name of the symbol represented by the parent *Peg* object variant
|
||||
## of a given *NonTerminal*.
|
||||
|
||||
proc line*(nt: NonTerminal): int = nt.line
|
||||
## Gets the line number of the definition of the parent *Peg* object variant
|
||||
## of a given *NonTerminal*.
|
||||
|
||||
proc col*(nt: NonTerminal): int = nt.col
|
||||
## Gets the column number of the definition of the parent *Peg* object variant
|
||||
## of a given *NonTerminal*.
|
||||
|
||||
proc flags*(nt: NonTerminal): set[NonTerminalFlag] = nt.flags
|
||||
## Gets the *NonTerminalFlag*-typed flags field of the parent *Peg* variant
|
||||
## object of a given *NonTerminal*.
|
||||
|
||||
proc rule*(nt: NonTerminal): Peg = nt.rule
|
||||
## Gets the *Peg* object representing the rule definition of the parent *Peg*
|
||||
## object variant of a given *NonTerminal*.
|
||||
|
||||
proc term*(t: string): Peg {.nosideEffect, rtl, extern: "npegs$1Str".} =
|
||||
## constructs a PEG from a terminal string
|
||||
if t.len != 1:
|
||||
@@ -540,223 +574,497 @@ when not useUnicode:
|
||||
proc isTitle(a: char): bool {.inline.} = return false
|
||||
proc isWhiteSpace(a: char): bool {.inline.} = return a in {' ', '\9'..'\13'}
|
||||
|
||||
proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int {.
|
||||
nosideEffect, rtl, extern: "npegs$1".} =
|
||||
template matchOrParse(mopProc: untyped): typed =
|
||||
# Used to make the main matcher proc *rawMatch* as well as event parser
|
||||
# procs. For the former, *enter* and *leave* event handler code generators
|
||||
# are provided which just return *discard*.
|
||||
|
||||
proc mopProc(s: string, p: Peg, start: int, c: var Captures): int =
|
||||
proc matchBackRef(s: string, p: Peg, start: int, c: var Captures): int =
|
||||
# Parse handler code must run in an *of* clause of its own for each
|
||||
# *PegKind*, so we encapsulate the identical clause body for
|
||||
# *pkBackRef..pkBackRefIgnoreStyle* here.
|
||||
if p.index >= c.ml: return -1
|
||||
var (a, b) = c.matches[p.index]
|
||||
var n: Peg
|
||||
n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef))
|
||||
n.term = s.substr(a, b)
|
||||
mopProc(s, n, start, c)
|
||||
|
||||
case p.kind
|
||||
of pkEmpty:
|
||||
enter(pkEmpty, s, p, start)
|
||||
result = 0 # match of length 0
|
||||
leave(pkEmpty, s, p, start, result)
|
||||
of pkAny:
|
||||
enter(pkAny, s, p, start)
|
||||
if start < s.len: result = 1
|
||||
else: result = -1
|
||||
leave(pkAny, s, p, start, result)
|
||||
of pkAnyRune:
|
||||
enter(pkAnyRune, s, p, start)
|
||||
if start < s.len:
|
||||
result = runeLenAt(s, start)
|
||||
else:
|
||||
result = -1
|
||||
leave(pkAnyRune, s, p, start, result)
|
||||
of pkLetter:
|
||||
enter(pkLetter, s, p, start)
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isAlpha(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
leave(pkLetter, s, p, start, result)
|
||||
of pkLower:
|
||||
enter(pkLower, s, p, start)
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isLower(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
leave(pkLower, s, p, start, result)
|
||||
of pkUpper:
|
||||
enter(pkUpper, s, p, start)
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isUpper(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
leave(pkUpper, s, p, start, result)
|
||||
of pkTitle:
|
||||
enter(pkTitle, s, p, start)
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isTitle(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
leave(pkTitle, s, p, start, result)
|
||||
of pkWhitespace:
|
||||
enter(pkWhitespace, s, p, start)
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isWhiteSpace(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
leave(pkWhitespace, s, p, start, result)
|
||||
of pkGreedyAny:
|
||||
enter(pkGreedyAny, s, p, start)
|
||||
result = len(s) - start
|
||||
leave(pkGreedyAny, s, p, start, result)
|
||||
of pkNewLine:
|
||||
enter(pkNewLine, s, p, start)
|
||||
if start < s.len and s[start] == '\L': result = 1
|
||||
elif start < s.len and s[start] == '\C':
|
||||
if start+1 < s.len and s[start+1] == '\L': result = 2
|
||||
else: result = 1
|
||||
else: result = -1
|
||||
leave(pkNewLine, s, p, start, result)
|
||||
of pkTerminal:
|
||||
enter(pkTerminal, s, p, start)
|
||||
result = len(p.term)
|
||||
for i in 0..result-1:
|
||||
if start+i >= s.len or p.term[i] != s[start+i]:
|
||||
result = -1
|
||||
break
|
||||
leave(pkTerminal, s, p, start, result)
|
||||
of pkTerminalIgnoreCase:
|
||||
enter(pkTerminalIgnoreCase, s, p, start)
|
||||
var
|
||||
i = 0
|
||||
a, b: Rune
|
||||
result = start
|
||||
while i < len(p.term):
|
||||
if result >= s.len:
|
||||
result = -1
|
||||
break
|
||||
fastRuneAt(p.term, i, a)
|
||||
fastRuneAt(s, result, b)
|
||||
if toLower(a) != toLower(b):
|
||||
result = -1
|
||||
break
|
||||
dec(result, start)
|
||||
leave(pkTerminalIgnoreCase, s, p, start, result)
|
||||
of pkTerminalIgnoreStyle:
|
||||
enter(pkTerminalIgnoreStyle, s, p, start)
|
||||
var
|
||||
i = 0
|
||||
a, b: Rune
|
||||
result = start
|
||||
while i < len(p.term):
|
||||
while i < len(p.term):
|
||||
fastRuneAt(p.term, i, a)
|
||||
if a != Rune('_'): break
|
||||
while result < s.len:
|
||||
fastRuneAt(s, result, b)
|
||||
if b != Rune('_'): break
|
||||
if result >= s.len:
|
||||
if i >= p.term.len: break
|
||||
else:
|
||||
result = -1
|
||||
break
|
||||
elif toLower(a) != toLower(b):
|
||||
result = -1
|
||||
break
|
||||
dec(result, start)
|
||||
leave(pkTerminalIgnoreStyle, s, p, start, result)
|
||||
of pkChar:
|
||||
enter(pkChar, s, p, start)
|
||||
if start < s.len and p.ch == s[start]: result = 1
|
||||
else: result = -1
|
||||
leave(pkChar, s, p, start, result)
|
||||
of pkCharChoice:
|
||||
enter(pkCharChoice, s, p, start)
|
||||
if start < s.len and contains(p.charChoice[], s[start]): result = 1
|
||||
else: result = -1
|
||||
leave(pkCharChoice, s, p, start, result)
|
||||
of pkNonTerminal:
|
||||
enter(pkNonTerminal, s, p, start)
|
||||
var oldMl = c.ml
|
||||
when false: echo "enter: ", p.nt.name
|
||||
result = mopProc(s, p.nt.rule, start, c)
|
||||
when false: echo "leave: ", p.nt.name
|
||||
if result < 0: c.ml = oldMl
|
||||
leave(pkNonTerminal, s, p, start, result)
|
||||
of pkSequence:
|
||||
enter(pkSequence, s, p, start)
|
||||
var oldMl = c.ml
|
||||
result = 0
|
||||
for i in 0..high(p.sons):
|
||||
var x = mopProc(s, p.sons[i], start+result, c)
|
||||
if x < 0:
|
||||
c.ml = oldMl
|
||||
result = -1
|
||||
break
|
||||
else: inc(result, x)
|
||||
leave(pkSequence, s, p, start, result)
|
||||
of pkOrderedChoice:
|
||||
enter(pkOrderedChoice, s, p, start)
|
||||
var oldMl = c.ml
|
||||
for i in 0..high(p.sons):
|
||||
result = mopProc(s, p.sons[i], start, c)
|
||||
if result >= 0: break
|
||||
c.ml = oldMl
|
||||
leave(pkOrderedChoice, s, p, start, result)
|
||||
of pkSearch:
|
||||
enter(pkSearch, s, p, start)
|
||||
var oldMl = c.ml
|
||||
result = 0
|
||||
while start+result <= s.len:
|
||||
var x = mopProc(s, p.sons[0], start+result, c)
|
||||
if x >= 0:
|
||||
inc(result, x)
|
||||
leave(pkSearch, s, p, start, result)
|
||||
return
|
||||
inc(result)
|
||||
result = -1
|
||||
c.ml = oldMl
|
||||
leave(pkSearch, s, p, start, result)
|
||||
of pkCapturedSearch:
|
||||
enter(pkCapturedSearch, s, p, start)
|
||||
var idx = c.ml # reserve a slot for the subpattern
|
||||
inc(c.ml)
|
||||
result = 0
|
||||
while start+result <= s.len:
|
||||
var x = mopProc(s, p.sons[0], start+result, c)
|
||||
if x >= 0:
|
||||
if idx < MaxSubpatterns:
|
||||
c.matches[idx] = (start, start+result-1)
|
||||
#else: silently ignore the capture
|
||||
inc(result, x)
|
||||
leave(pkCapturedSearch, s, p, start, result)
|
||||
return
|
||||
inc(result)
|
||||
result = -1
|
||||
c.ml = idx
|
||||
leave(pkCapturedSearch, s, p, start, result)
|
||||
of pkGreedyRep:
|
||||
enter(pkGreedyRep, s, p, start)
|
||||
result = 0
|
||||
while true:
|
||||
var x = mopProc(s, p.sons[0], start+result, c)
|
||||
# if x == 0, we have an endless loop; so the correct behaviour would be
|
||||
# not to break. But endless loops can be easily introduced:
|
||||
# ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the
|
||||
# expected thing in this case.
|
||||
if x <= 0: break
|
||||
inc(result, x)
|
||||
leave(pkGreedyRep, s, p, start, result)
|
||||
of pkGreedyRepChar:
|
||||
enter(pkGreedyRepChar, s, p, start)
|
||||
result = 0
|
||||
var ch = p.ch
|
||||
while start+result < s.len and ch == s[start+result]: inc(result)
|
||||
leave(pkGreedyRepChar, s, p, start, result)
|
||||
of pkGreedyRepSet:
|
||||
enter(pkGreedyRepSet, s, p, start)
|
||||
result = 0
|
||||
while start+result < s.len and contains(p.charChoice[], s[start+result]): inc(result)
|
||||
leave(pkGreedyRepSet, s, p, start, result)
|
||||
of pkOption:
|
||||
enter(pkOption, s, p, start)
|
||||
result = max(0, mopProc(s, p.sons[0], start, c))
|
||||
leave(pkOption, s, p, start, result)
|
||||
of pkAndPredicate:
|
||||
enter(pkAndPredicate, s, p, start)
|
||||
var oldMl = c.ml
|
||||
result = mopProc(s, p.sons[0], start, c)
|
||||
if result >= 0: result = 0 # do not consume anything
|
||||
else: c.ml = oldMl
|
||||
leave(pkAndPredicate, s, p, start, result)
|
||||
of pkNotPredicate:
|
||||
enter(pkNotPredicate, s, p, start)
|
||||
var oldMl = c.ml
|
||||
result = mopProc(s, p.sons[0], start, c)
|
||||
if result < 0: result = 0
|
||||
else:
|
||||
c.ml = oldMl
|
||||
result = -1
|
||||
leave(pkNotPredicate, s, p, start, result)
|
||||
of pkCapture:
|
||||
enter(pkCapture, s, p, start)
|
||||
var idx = c.ml # reserve a slot for the subpattern
|
||||
inc(c.ml)
|
||||
result = mopProc(s, p.sons[0], start, c)
|
||||
if result >= 0:
|
||||
if idx < MaxSubpatterns:
|
||||
c.matches[idx] = (start, start+result-1)
|
||||
#else: silently ignore the capture
|
||||
else:
|
||||
c.ml = idx
|
||||
leave(pkCapture, s, p, start, result)
|
||||
of pkBackRef:
|
||||
enter(pkBackRef, s, p, start)
|
||||
result = matchBackRef(s, p, start, c)
|
||||
leave(pkBackRef, s, p, start, result)
|
||||
of pkBackRefIgnoreCase:
|
||||
enter(pkBackRefIgnoreCase, s, p, start)
|
||||
result = matchBackRef(s, p, start, c)
|
||||
leave(pkBackRefIgnoreCase, s, p, start, result)
|
||||
of pkBackRefIgnoreStyle:
|
||||
enter(pkBackRefIgnoreStyle, s, p, start)
|
||||
result = matchBackRef(s, p, start, c)
|
||||
leave(pkBackRefIgnoreStyle, s, p, start, result)
|
||||
of pkStartAnchor:
|
||||
enter(pkStartAnchor, s, p, start)
|
||||
if c.origStart == start: result = 0
|
||||
else: result = -1
|
||||
leave(pkStartAnchor, s, p, start, result)
|
||||
of pkRule, pkList: assert false
|
||||
|
||||
proc rawMatch*(s: string, p: Peg, start: int, c: var Captures): int
|
||||
{.noSideEffect, rtl, extern: "npegs$1".} =
|
||||
## low-level matching proc that implements the PEG interpreter. Use this
|
||||
## for maximum efficiency (every other PEG operation ends up calling this
|
||||
## proc).
|
||||
## Returns -1 if it does not match, else the length of the match
|
||||
case p.kind
|
||||
of pkEmpty: result = 0 # match of length 0
|
||||
of pkAny:
|
||||
if start < s.len: result = 1
|
||||
else: result = -1
|
||||
of pkAnyRune:
|
||||
if start < s.len:
|
||||
result = runeLenAt(s, start)
|
||||
else:
|
||||
result = -1
|
||||
of pkLetter:
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isAlpha(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
of pkLower:
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isLower(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
of pkUpper:
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isUpper(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
of pkTitle:
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isTitle(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
of pkWhitespace:
|
||||
if start < s.len:
|
||||
var a: Rune
|
||||
result = start
|
||||
fastRuneAt(s, result, a)
|
||||
if isWhiteSpace(a): dec(result, start)
|
||||
else: result = -1
|
||||
else:
|
||||
result = -1
|
||||
of pkGreedyAny:
|
||||
result = len(s) - start
|
||||
of pkNewLine:
|
||||
if start < s.len and s[start] == '\L': result = 1
|
||||
elif start < s.len and s[start] == '\C':
|
||||
if start+1 < s.len and s[start+1] == '\L': result = 2
|
||||
else: result = 1
|
||||
else: result = -1
|
||||
of pkTerminal:
|
||||
result = len(p.term)
|
||||
for i in 0..result-1:
|
||||
if start+i >= s.len or p.term[i] != s[start+i]:
|
||||
result = -1
|
||||
break
|
||||
of pkTerminalIgnoreCase:
|
||||
var
|
||||
i = 0
|
||||
a, b: Rune
|
||||
result = start
|
||||
while i < len(p.term):
|
||||
if result >= s.len:
|
||||
result = -1
|
||||
break
|
||||
fastRuneAt(p.term, i, a)
|
||||
fastRuneAt(s, result, b)
|
||||
if toLower(a) != toLower(b):
|
||||
result = -1
|
||||
break
|
||||
dec(result, start)
|
||||
of pkTerminalIgnoreStyle:
|
||||
var
|
||||
i = 0
|
||||
a, b: Rune
|
||||
result = start
|
||||
while i < len(p.term):
|
||||
while i < len(p.term):
|
||||
fastRuneAt(p.term, i, a)
|
||||
if a != Rune('_'): break
|
||||
while result < s.len:
|
||||
fastRuneAt(s, result, b)
|
||||
if b != Rune('_'): break
|
||||
if result >= s.len:
|
||||
if i >= p.term.len: break
|
||||
|
||||
# Set the handler generators to produce do-nothing handlers.
|
||||
template enter(pk, s, p, start) =
|
||||
discard
|
||||
template leave(pk, s, p, start, length) =
|
||||
discard
|
||||
matchOrParse(matchIt)
|
||||
result = matchIt(s, p, start, c)
|
||||
|
||||
macro mkHandlerTplts(handlers: untyped): untyped =
|
||||
# Transforms the handler spec in *handlers* into handler templates.
|
||||
# The AST structure of *handlers[0]*:
|
||||
#
|
||||
# .. code-block::
|
||||
# StmtList
|
||||
# Call
|
||||
# Ident "pkNonTerminal"
|
||||
# StmtList
|
||||
# Call
|
||||
# Ident "enter"
|
||||
# StmtList
|
||||
# <handler code block>
|
||||
# Call
|
||||
# Ident "leave"
|
||||
# StmtList
|
||||
# <handler code block>
|
||||
# Call
|
||||
# Ident "pkChar"
|
||||
# StmtList
|
||||
# Call
|
||||
# Ident "leave"
|
||||
# StmtList
|
||||
# <handler code block>
|
||||
# ...
|
||||
proc mkEnter(hdName, body: NimNode): NimNode =
|
||||
quote do:
|
||||
template `hdName`(s, p, start) =
|
||||
let s {.inject.} = s
|
||||
let p {.inject.} = p
|
||||
let start {.inject.} = start
|
||||
`body`
|
||||
|
||||
template mkLeave(hdPostf, body) {.dirty.} =
|
||||
# this has to be dirty to be able to capture *result* as *length* in
|
||||
# *leaveXX* calls.
|
||||
template `leave hdPostf`(s, p, start, length) =
|
||||
body
|
||||
|
||||
result = newStmtList()
|
||||
for topCall in handlers[0]:
|
||||
if nnkCall != topCall.kind:
|
||||
error("Call syntax expected.", topCall)
|
||||
let pegKind = topCall[0]
|
||||
if nnkIdent != pegKind.kind:
|
||||
error("PegKind expected.", pegKind)
|
||||
if 2 == topCall.len:
|
||||
for hdDef in topCall[1]:
|
||||
if nnkCall != hdDef.kind:
|
||||
error("Call syntax expected.", hdDef)
|
||||
if nnkIdent != hdDef[0].kind:
|
||||
error("Handler identifier expected.", hdDef[0])
|
||||
if 2 == hdDef.len:
|
||||
let hdPostf = substr(pegKind.strVal, 2)
|
||||
case hdDef[0].strVal
|
||||
of "enter":
|
||||
result.add mkEnter(newIdentNode("enter" & hdPostf), hdDef[1])
|
||||
of "leave":
|
||||
result.add getAst(mkLeave(ident(hdPostf), hdDef[1]))
|
||||
else:
|
||||
error(
|
||||
"Unsupported handler identifier, expected 'enter' or 'leave'.",
|
||||
hdDef[0]
|
||||
)
|
||||
|
||||
template eventParser*(pegAst, handlers: untyped): (proc(s: string): int) =
|
||||
## Generates an interpreting event parser *proc* according to the specified
|
||||
## PEG AST and handler code blocks. The *proc* can be called with a string
|
||||
## to be parsed and will execute the handler code blocks whenever their
|
||||
## associated grammar element is matched. It returns -1 if the string does not
|
||||
## match, else the length of the total match. The following example code
|
||||
## evaluates an arithmetic expression defined by a simple PEG:
|
||||
##
|
||||
## .. code-block:: nim
|
||||
## import strutils, pegs
|
||||
##
|
||||
## let
|
||||
## pegAst = """
|
||||
## Expr <- Sum
|
||||
## Sum <- Product (('+' / '-')Product)*
|
||||
## Product <- Value (('*' / '/')Value)*
|
||||
## Value <- [0-9]+ / '(' Expr ')'
|
||||
## """.peg
|
||||
## txt = "(5+3)/2-7*22"
|
||||
##
|
||||
## var
|
||||
## pStack: seq[string] = @[]
|
||||
## valStack: seq[float] = @[]
|
||||
## opStack = ""
|
||||
## let
|
||||
## parseArithExpr = pegAst.eventParser:
|
||||
## pkNonTerminal:
|
||||
## enter:
|
||||
## pStack.add p.nt.name
|
||||
## leave:
|
||||
## pStack.setLen pStack.high
|
||||
## if length > 0:
|
||||
## let matchStr = s.substr(start, start+length-1)
|
||||
## case p.nt.name
|
||||
## of "Value":
|
||||
## try:
|
||||
## valStack.add matchStr.parseFloat
|
||||
## echo valStack
|
||||
## except ValueError:
|
||||
## discard
|
||||
## of "Sum", "Product":
|
||||
## try:
|
||||
## let val = matchStr.parseFloat
|
||||
## except ValueError:
|
||||
## if valStack.len > 1 and opStack.len > 0:
|
||||
## valStack[^2] = case opStack[^1]
|
||||
## of '+': valStack[^2] + valStack[^1]
|
||||
## of '-': valStack[^2] - valStack[^1]
|
||||
## of '*': valStack[^2] * valStack[^1]
|
||||
## else: valStack[^2] / valStack[^1]
|
||||
## valStack.setLen valStack.high
|
||||
## echo valStack
|
||||
## opStack.setLen opStack.high
|
||||
## echo opStack
|
||||
## pkChar:
|
||||
## leave:
|
||||
## if length == 1 and "Value" != pStack[^1]:
|
||||
## let matchChar = s[start]
|
||||
## opStack.add matchChar
|
||||
## echo opStack
|
||||
##
|
||||
## let pLen = parseArithExpr(txt)
|
||||
##
|
||||
## The *handlers* parameter consists of code blocks for *PegKinds*,
|
||||
## which define the grammar elements of interest. Each block can contain
|
||||
## handler code to be executed when the parser enters and leaves text
|
||||
## matching the grammar element. An *enter* handler can access the specific
|
||||
## PEG AST node being matched as *p*, the entire parsed string as *s*
|
||||
## and the position of the matched text segment in *s* as *start*. A *leave*
|
||||
## handler can access *p*, *s*, *start* and also the length of the matched
|
||||
## text segment as *length*. For an unsuccessful match, the *enter* and
|
||||
## *leave* handlers will be executed, with *length* set to -1.
|
||||
##
|
||||
## Symbols declared in an *enter* handler can be made visible in the
|
||||
## corresponding *leave* handler by annotating them with an *inject* pragma.
|
||||
proc rawParse(s: string, p: Peg, start: int, c: var Captures): int
|
||||
{.genSym.} =
|
||||
|
||||
# binding from *macros*
|
||||
bind strVal
|
||||
|
||||
mkHandlerTplts:
|
||||
handlers
|
||||
|
||||
macro enter(pegKind, s, pegNode, start: untyped): untyped =
|
||||
# This is called by the matcher code in *matchOrParse* at the
|
||||
# start of the code for a grammar element of kind *pegKind*.
|
||||
# Expands to a call to the handler template if one was generated
|
||||
# by *mkHandlerTplts*.
|
||||
template mkDoEnter(hdPostf, s, pegNode, start) =
|
||||
when declared(`enter hdPostf`):
|
||||
`enter hdPostf`(s, pegNode, start):
|
||||
else:
|
||||
result = -1
|
||||
break
|
||||
elif toLower(a) != toLower(b):
|
||||
result = -1
|
||||
break
|
||||
dec(result, start)
|
||||
of pkChar:
|
||||
if start < s.len and p.ch == s[start]: result = 1
|
||||
else: result = -1
|
||||
of pkCharChoice:
|
||||
if start < s.len and contains(p.charChoice[], s[start]): result = 1
|
||||
else: result = -1
|
||||
of pkNonTerminal:
|
||||
var oldMl = c.ml
|
||||
when false: echo "enter: ", p.nt.name
|
||||
result = rawMatch(s, p.nt.rule, start, c)
|
||||
when false: echo "leave: ", p.nt.name
|
||||
if result < 0: c.ml = oldMl
|
||||
of pkSequence:
|
||||
var oldMl = c.ml
|
||||
result = 0
|
||||
for i in 0..high(p.sons):
|
||||
var x = rawMatch(s, p.sons[i], start+result, c)
|
||||
if x < 0:
|
||||
c.ml = oldMl
|
||||
result = -1
|
||||
break
|
||||
else: inc(result, x)
|
||||
of pkOrderedChoice:
|
||||
var oldMl = c.ml
|
||||
for i in 0..high(p.sons):
|
||||
result = rawMatch(s, p.sons[i], start, c)
|
||||
if result >= 0: break
|
||||
c.ml = oldMl
|
||||
of pkSearch:
|
||||
var oldMl = c.ml
|
||||
result = 0
|
||||
while start+result <= s.len:
|
||||
var x = rawMatch(s, p.sons[0], start+result, c)
|
||||
if x >= 0:
|
||||
inc(result, x)
|
||||
return
|
||||
inc(result)
|
||||
result = -1
|
||||
c.ml = oldMl
|
||||
of pkCapturedSearch:
|
||||
var idx = c.ml # reserve a slot for the subpattern
|
||||
inc(c.ml)
|
||||
result = 0
|
||||
while start+result <= s.len:
|
||||
var x = rawMatch(s, p.sons[0], start+result, c)
|
||||
if x >= 0:
|
||||
if idx < MaxSubpatterns:
|
||||
c.matches[idx] = (start, start+result-1)
|
||||
#else: silently ignore the capture
|
||||
inc(result, x)
|
||||
return
|
||||
inc(result)
|
||||
result = -1
|
||||
c.ml = idx
|
||||
of pkGreedyRep:
|
||||
result = 0
|
||||
while true:
|
||||
var x = rawMatch(s, p.sons[0], start+result, c)
|
||||
# if x == 0, we have an endless loop; so the correct behaviour would be
|
||||
# not to break. But endless loops can be easily introduced:
|
||||
# ``(comment / \w*)*`` is such an example. Breaking for x == 0 does the
|
||||
# expected thing in this case.
|
||||
if x <= 0: break
|
||||
inc(result, x)
|
||||
of pkGreedyRepChar:
|
||||
result = 0
|
||||
var ch = p.ch
|
||||
while start+result < s.len and ch == s[start+result]: inc(result)
|
||||
of pkGreedyRepSet:
|
||||
result = 0
|
||||
while start+result < s.len and contains(p.charChoice[], s[start+result]): inc(result)
|
||||
of pkOption:
|
||||
result = max(0, rawMatch(s, p.sons[0], start, c))
|
||||
of pkAndPredicate:
|
||||
var oldMl = c.ml
|
||||
result = rawMatch(s, p.sons[0], start, c)
|
||||
if result >= 0: result = 0 # do not consume anything
|
||||
else: c.ml = oldMl
|
||||
of pkNotPredicate:
|
||||
var oldMl = c.ml
|
||||
result = rawMatch(s, p.sons[0], start, c)
|
||||
if result < 0: result = 0
|
||||
else:
|
||||
c.ml = oldMl
|
||||
result = -1
|
||||
of pkCapture:
|
||||
var idx = c.ml # reserve a slot for the subpattern
|
||||
inc(c.ml)
|
||||
result = rawMatch(s, p.sons[0], start, c)
|
||||
if result >= 0:
|
||||
if idx < MaxSubpatterns:
|
||||
c.matches[idx] = (start, start+result-1)
|
||||
#else: silently ignore the capture
|
||||
else:
|
||||
c.ml = idx
|
||||
of pkBackRef..pkBackRefIgnoreStyle:
|
||||
if p.index >= c.ml: return -1
|
||||
var (a, b) = c.matches[p.index]
|
||||
var n: Peg
|
||||
n.kind = succ(pkTerminal, ord(p.kind)-ord(pkBackRef))
|
||||
n.term = s.substr(a, b)
|
||||
result = rawMatch(s, n, start, c)
|
||||
of pkStartAnchor:
|
||||
if c.origStart == start: result = 0
|
||||
else: result = -1
|
||||
of pkRule, pkList: assert false
|
||||
discard
|
||||
let hdPostf = ident(substr(strVal(pegKind), 2))
|
||||
getAst(mkDoEnter(hdPostf, s, pegNode, start))
|
||||
|
||||
macro leave(pegKind, s, pegNode, start, length: untyped): untyped =
|
||||
# Like *enter*, but called at the end of the matcher code for
|
||||
# a grammar element of kind *pegKind*.
|
||||
template mkDoLeave(hdPostf, s, pegNode, start, length) =
|
||||
when declared(`leave hdPostf`):
|
||||
`leave hdPostf`(s, pegNode, start, length):
|
||||
else:
|
||||
discard
|
||||
let hdPostf = ident(substr(strVal(pegKind), 2))
|
||||
getAst(mkDoLeave(hdPostf, s, pegNode, start, length))
|
||||
|
||||
matchOrParse(parseIt)
|
||||
parseIt(s, p, start, c)
|
||||
|
||||
proc parser(s: string): int {.genSym.} =
|
||||
# the proc to be returned
|
||||
var
|
||||
ms: array[MaxSubpatterns, (int, int)]
|
||||
cs = Captures(matches: ms, ml: 0, origStart: 0)
|
||||
rawParse(s, pegAst, 0, cs)
|
||||
parser
|
||||
|
||||
template fillMatches(s, caps, c) =
|
||||
for k in 0..c.ml-1:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
discard """
|
||||
output: '''
|
||||
PEG AST traversal output
|
||||
------------------------
|
||||
pkNonTerminal: Sum @(2, 3)
|
||||
pkSequence: (Product (('+' / '-') Product)*)
|
||||
pkNonTerminal: Product @(3, 7)
|
||||
@@ -26,6 +28,25 @@ pkNonTerminal: Sum @(2, 3)
|
||||
pkChar: '+'
|
||||
pkChar: '-'
|
||||
pkNonTerminal: Product @(3, 7)
|
||||
|
||||
Event parser output
|
||||
-------------------
|
||||
@[5.0]
|
||||
+
|
||||
@[5.0, 3.0]
|
||||
@[8.0]
|
||||
|
||||
/
|
||||
@[8.0, 2.0]
|
||||
@[4.0]
|
||||
|
||||
-
|
||||
@[4.0, 7.0]
|
||||
-*
|
||||
@[4.0, 7.0, 22.0]
|
||||
@[4.0, 154.0]
|
||||
-
|
||||
@[-150.0]
|
||||
'''
|
||||
"""
|
||||
|
||||
@@ -36,43 +57,92 @@ const
|
||||
indent = " "
|
||||
|
||||
let
|
||||
pegSrc = """
|
||||
Expr <- Sum
|
||||
Sum <- Product (('+' / '-') Product)*
|
||||
Product <- Value (('*' / '/') Value)*
|
||||
Value <- [0-9]+ / '(' Expr ')'
|
||||
"""
|
||||
pegAst: Peg = pegSrc.peg
|
||||
pegAst = """
|
||||
Expr <- Sum
|
||||
Sum <- Product (('+' / '-')Product)*
|
||||
Product <- Value (('*' / '/')Value)*
|
||||
Value <- [0-9]+ / '(' Expr ')'
|
||||
""".peg
|
||||
txt = "(5+3)/2-7*22"
|
||||
|
||||
var
|
||||
outp = newStringStream()
|
||||
processed: seq[string] = @[]
|
||||
block:
|
||||
var
|
||||
outp = newStringStream()
|
||||
processed: seq[string] = @[]
|
||||
|
||||
proc prt(outp: Stream, kind: PegKind, s: string; level: int = 0) =
|
||||
outp.writeLine indent.repeat(level) & "$1: $2" % [$kind, s]
|
||||
proc prt(outp: Stream, kind: PegKind, s: string; level: int = 0) =
|
||||
outp.writeLine indent.repeat(level) & "$1: $2" % [$kind, s]
|
||||
|
||||
proc recLoop(p: Peg, level: int = 0) =
|
||||
case p.kind
|
||||
of pkEmpty..pkWhitespace:
|
||||
discard
|
||||
of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle:
|
||||
outp.prt(p.kind, $p, level)
|
||||
of pkChar, pkGreedyRepChar:
|
||||
outp.prt(p.kind, $p, level)
|
||||
of pkCharChoice, pkGreedyRepSet:
|
||||
outp.prt(p.kind, $p, level)
|
||||
of pkNonTerminal:
|
||||
outp.prt(p.kind,
|
||||
"$1 @($3, $4)" % [p.nt.name, $p.nt.rule.kind, $p.nt.line, $p.nt.col], level)
|
||||
if not(p.nt.name in processed):
|
||||
processed.add p.nt.name
|
||||
p.nt.rule.recLoop level+1
|
||||
of pkBackRef..pkBackRefIgnoreStyle:
|
||||
outp.prt(p.kind, $p, level)
|
||||
else:
|
||||
outp.prt(p.kind, $p, level)
|
||||
for s in items(p):
|
||||
s.recLoop level+1
|
||||
proc recLoop(p: Peg, level: int = 0) =
|
||||
case p.kind
|
||||
of pkEmpty..pkWhitespace:
|
||||
discard
|
||||
of pkTerminal, pkTerminalIgnoreCase, pkTerminalIgnoreStyle:
|
||||
outp.prt(p.kind, $p, level)
|
||||
of pkChar, pkGreedyRepChar:
|
||||
outp.prt(p.kind, $p, level)
|
||||
of pkCharChoice, pkGreedyRepSet:
|
||||
outp.prt(p.kind, $p, level)
|
||||
of pkNonTerminal:
|
||||
outp.prt(p.kind,
|
||||
"$1 @($3, $4)" % [p.nt.name, $p.nt.rule.kind, $p.nt.line, $p.nt.col], level)
|
||||
if not(p.nt.name in processed):
|
||||
processed.add p.nt.name
|
||||
p.nt.rule.recLoop level+1
|
||||
of pkBackRef..pkBackRefIgnoreStyle:
|
||||
outp.prt(p.kind, $p, level)
|
||||
else:
|
||||
outp.prt(p.kind, $p, level)
|
||||
for s in items(p):
|
||||
s.recLoop level+1
|
||||
|
||||
pegAst.recLoop
|
||||
echo outp.data
|
||||
pegAst.recLoop
|
||||
echo "PEG AST traversal output"
|
||||
echo "------------------------"
|
||||
echo outp.data
|
||||
|
||||
block:
|
||||
var
|
||||
pStack: seq[string] = @[]
|
||||
valStack: seq[float] = @[]
|
||||
opStack = ""
|
||||
let
|
||||
parseArithExpr = pegAst.eventParser:
|
||||
pkNonTerminal:
|
||||
enter:
|
||||
pStack.add p.nt.name
|
||||
leave:
|
||||
pStack.setLen pStack.high
|
||||
if length > 0:
|
||||
let matchStr = s.substr(start, start+length-1)
|
||||
case p.nt.name
|
||||
of "Value":
|
||||
try:
|
||||
valStack.add matchStr.parseFloat
|
||||
echo valStack
|
||||
except ValueError:
|
||||
discard
|
||||
of "Sum", "Product":
|
||||
try:
|
||||
let val = matchStr.parseFloat
|
||||
except ValueError:
|
||||
if valStack.len > 1 and opStack.len > 0:
|
||||
valStack[^2] = case opStack[^1]
|
||||
of '+': valStack[^2] + valStack[^1]
|
||||
of '-': valStack[^2] - valStack[^1]
|
||||
of '*': valStack[^2] * valStack[^1]
|
||||
else: valStack[^2] / valStack[^1]
|
||||
valStack.setLen valStack.high
|
||||
echo valStack
|
||||
opStack.setLen opStack.high
|
||||
echo opStack
|
||||
pkChar:
|
||||
leave:
|
||||
if length == 1 and "Value" != pStack[^1]:
|
||||
let matchChar = s[start]
|
||||
opStack.add matchChar
|
||||
echo opStack
|
||||
echo "Event parser output"
|
||||
echo "-------------------"
|
||||
let pLen = parseArithExpr(txt)
|
||||
assert txt.len == pLen
|
||||
|
||||
Reference in New Issue
Block a user