bugfixes: re; pegs module

This commit is contained in:
Andreas Rumpf
2010-02-28 18:14:27 +01:00
parent 230567588b
commit ddb7185482
13 changed files with 97 additions and 72 deletions

View File

@@ -1,5 +1,5 @@
# Configuration file for the Nimrod Compiler.
# (c) 2009 Andreas Rumpf
# (c) 2010 Andreas Rumpf
# Feel free to edit the default values as you need.

View File

@@ -55,7 +55,7 @@ String handling
* `unicode <unicode.html>`_
This module provides support to handle the Unicode UTF-8 encoding.
* `regexprs <regexprs.html>`_
* `re <re.html>`_
This module contains procedures and operators for handling regular
expressions. Consider using `pegs` instead.
@@ -229,6 +229,10 @@ Database support
* `db_mysql <db_mysql.html>`_
A higher level mySQL database wrapper. The same interface is implemented
for other databases too.
* `db_sqlite <db_sqlite.html>`_
A higher level mySQL database wrapper. The same interface is implemented
for other databases too.

View File

@@ -86,8 +86,8 @@ macro meaning
``\s`` any whitespace character: ``[ \9-\13]``
``\S`` any character that is not a whitespace character:
``[^ \9-\13]``
``\w`` any "word" character: ``[a-zA-Z_]``
``\W`` any "non-word" character: ``[^a-zA-Z_]``
``\w`` any "word" character: ``[a-zA-Z0-9_]``
``\W`` any "non-word" character: ``[^a-zA-Z0-9_]``
``\n`` any newline combination: ``\10 / \13\10 / \13``
``\i`` ignore case for matching; use this at the start of the PEG
``\y`` ignore style for matching; use this at the start of the PEG

View File

@@ -604,7 +604,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string],
## match, nothing is written into ``matches`` and ``false`` is
## returned.
var c: TMatchClosure
result = m(s, pattern, start, c) == len(s)
result = m(s, pattern, start, c) == len(s) -start
if result:
for i in 0..c.ml-1:
matches[i] = copy(s, c.matches[i][0], c.matches[i][1])
@@ -612,7 +612,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string],
proc match*(s: string, pattern: TPeg, start = 0): bool =
## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``.
var c: TMatchClosure
result = m(s, pattern, start, c) == len(s)
result = m(s, pattern, start, c) == len(s)-start
proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string],
start = 0): int =
@@ -1263,8 +1263,8 @@ proc primary(p: var TPegParser): TPeg =
of "D": result = charset({'\1'..'\xff'} - {'0'..'9'})
of "s": result = charset({' ', '\9'..'\13'})
of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'})
of "w": result = charset({'a'..'z', 'A'..'Z', '_'})
of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'})
of "w": result = charset({'a'..'z', 'A'..'Z', '_', '0'..'9'})
of "W": result = charset({'\1'..'\xff'} - {'a'..'z','A'..'Z','_','0'..'9'})
of "ident": result = pegs.ident
else: pegError(p, "unknown built-in: " & p.tok.literal)
getTok(p)
@@ -1435,3 +1435,11 @@ when isMainModule:
assert matches[0] == "a"
else:
assert false
var matches: array[0..5, string]
if match("abcdefg", peg"'c' {'d'} 'ef' {'g'}", matches, 2):
assert matches[0] == "d"
assert matches[1] == "g"
else:
assert false

View File

@@ -75,12 +75,14 @@ proc matchOrFind(s: string, pattern: TRegEx, matches: var openarray[string],
var b = rawMatches[i * 2 + 1]
if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1)
else: matches[i-1] = ""
return res
return rawMatches[1] - rawMatches[0]
proc matchOrFind(s: string, pattern: TRegEx, start, flags: cint): cint =
var rawMatches: array [0..maxSubpatterns * 3 - 1, cint]
return pcreExec(pattern.h, nil, s, len(s), start, flags,
cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
result = pcreExec(pattern.h, nil, s, len(s), start, flags,
cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
if result >= 0'i32:
result = rawMatches[1] - rawMatches[0]
proc match*(s: string, pattern: TRegEx, matches: var openarray[string],
start = 0): bool =
@@ -88,11 +90,12 @@ proc match*(s: string, pattern: TRegEx, matches: var openarray[string],
## the captured substrings in the array ``matches``. If it does not
## match, nothing is written into ``matches`` and ``false`` is
## returned.
return matchOrFind(s, pattern, matches, start, PCRE_ANCHORED) >= 0'i32
return matchOrFind(s, pattern, matches, start,
PCRE_ANCHORED) == cint(s.len - start)
proc match*(s: string, pattern: TRegEx, start = 0): bool =
## returns ``true`` if ``s[start..]`` matches the ``pattern``.
return matchOrFind(s, pattern, start, PCRE_ANCHORED) >= 0'i32
return matchOrFind(s, pattern, start, PCRE_ANCHORED) == cint(s.len - start)
proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string],
start = 0): int =
@@ -112,12 +115,23 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string],
## returns the starting position of ``pattern`` in ``s`` and the captured
## substrings in the array ``matches``. If it does not match, nothing
## is written into ``matches`` and -1 is returned.
return matchOrFind(s, pattern, matches, start, 0'i32)
var
rawMatches: array[0..maxSubpatterns * 3 - 1, cint]
res = pcreExec(pattern.h, nil, s, len(s), start, 0'i32,
cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
if res < 0'i32: return res
for i in 1..int(res)-1:
var a = rawMatches[i * 2]
var b = rawMatches[i * 2 + 1]
if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1)
else: matches[i-1] = ""
return rawMatches[0]
proc find*(s: string, pattern: TRegEx, start = 0): int =
## returns the starting position of ``pattern`` in ``s``. If it does not
## match, -1 is returned.
return matchOrFind(s, pattern, start, 0'i32)
var matches: array[0..maxSubpatterns-1, string]
result = find(s, pattern, matches, start)
template `=~` *(s: string, pattern: TRegEx): expr =
## This calls ``match`` with an implicit declared ``matches`` array that
@@ -279,57 +293,36 @@ const ## common regular expressions
## describes an URL
when isMainModule:
assert match("(a b c)", re"'(' @ ')'")
assert match("WHiLe", re(r"while", {reIgnoreCase}))
assert match("(a b c)", re"\( .* \)")
assert match("WHiLe", re("while", {reIgnoreCase}))
assert "0158787".match(re"\d+")
assert "ABC 0232".match(re"\w+\s+\d+")
assert "ABC".match(re"\d+ / \w+")
assert "ABC".match(re"\d+ | \w+")
assert matchLen("key", re(reIdentifier)) == 3
var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+"
assert matchLen("key1= cal9", pattern) == 11
assert find("_____abc_______", re"abc") == 5
var matches: array[0..5, string]
if match("abcdefg", re"c(d)ef(g)", matches, 2):
assert matches[0] == "d"
assert matches[1] == "g"
else:
assert false
if "abc" =~ re"(a)bcxyz|(\w+)":
assert matches[1] == "abc"
else:
assert false
assert "var1=key; var2=key2".endsWith(re"\w+=\w+")
assert("var1=key; var2=key2".replace(re"(\w+)=(\w+)", "$1<-$2$2") ==
"var1<-keykey; var2<-key2key2")
for word in split("00232this02939is39an22example111", re"\d+"):
writeln(stdout, word)
assert matchLen("key", re(reIdentifier)) == 3
var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+")
assert matchLen("key1= cal9", pattern) == 11
var c: TMatchClosure
var s = "a+b + c +d+e+f"
assert m(s, expr.rule, 0, c) == len(s)
var a = ""
for i in 0..c.ml-1:
a.add(copy(s, c.matches[i][0], c.matches[i][1]))
assert a == "abcdef"
#echo expr.rule
#const filename = "lib/devel/peg/grammar.txt"
#var grammar = parsePeg(newFileStream(filename, fmRead), filename)
#echo "a <- [abc]*?".match(grammar)
assert find("_____abc_______", term("abc")) == 5
assert match("_______ana", peg"A <- 'ana' / . A")
assert match("abcs%%%", peg"A <- ..A / .A / '%'")
if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}":
assert matches[0] == "abc"
else:
assert false
var g2 = peg"""S <- A B / C D
A <- 'a'+
B <- 'b'+
C <- 'c'+
D <- 'd'+
"""
assert($g2 == "((A B) / (C D))")
assert match("cccccdddddd", g2)
assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
"var1<-keykey; var2<-key2key2")
assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}")
if "aaaaaa" =~ peg"'aa' !. / ({'a'})+":
assert matches[0] == "a"
else:
assert false

View File

@@ -1044,10 +1044,20 @@ proc target*(PI: PProcessingInstruction): string =
# --Other stuff--
# Writer
proc addEscaped(s: string): string =
result = ""
for c in items(s):
case c
of '<': result.add("&lt;")
of '>': result.add("&gt;")
of '&': result.add("&amp;")
of '"': result.add("&quot;")
else: result.add(c)
proc nodeToXml(n: PNode, indent: int = 0): string =
result = repeatChar(indent, ' ') & "<" & n.nodeName
for i in items(n.Attributes):
result.add(" " & i.name & "=\"" & i.value & "\"")
result.add(" " & i.name & "=\"" & addEscaped(i.value) & "\"")
if n.childNodes.len() == 0:
result.add("/>") # No idea why this doesn't need a \n :O
@@ -1060,7 +1070,7 @@ proc nodeToXml(n: PNode, indent: int = 0): string =
result.add(nodeToXml(i, indent + 2))
of TextNode:
result.add(repeatChar(indent * 2, ' '))
result.add(i.nodeValue)
result.add(addEscaped(i.nodeValue))
of CDataSectionNode:
result.add(repeatChar(indent * 2, ' '))
result.add("<![CDATA[" & i.nodeValue & "]]>")
@@ -1080,4 +1090,4 @@ proc nodeToXml(n: PNode, indent: int = 0): string =
proc `$`*(doc: PDocument): string =
## Converts a PDocument object into a string representation of it's XML
result = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
result.add(nodeToXml(doc.documentElement))
result.add(nodeToXml(doc.documentElement))

View File

@@ -165,4 +165,4 @@ when isMainModule:
echo(i.nodeName, "=", i.namespaceURI)
echo($xml)
echo($xml)

View File

@@ -223,9 +223,8 @@ proc newXmlTree*(tag: string, children: openArray[PXmlNode],
result.fAttr = attributes
proc xmlConstructor(e: PNimrodNode): PNimrodNode {.compileTime.} =
## use this procedure to define a new XML tag
expectLen(e, 1)
var a = e[0]
expectLen(e, 2)
var a = e[1]
if a.kind == nnkCall:
result = newCall("newXmlTree", toStrLit(a[0]))
var attrs = newCall("newStringTable", [])

View File

@@ -59,3 +59,4 @@ tstrutil.nim;ha/home/a1xyz/usr/bin
tvardecl.nim;44
tvarnums.nim;Success!
tvartup.nim;2 3
txmltree.nim;true
Can't render this file because it contains an unexpected character in line 57 and column 15.

View File

@@ -0,0 +1,7 @@
import xmltree
var x = <>a(href="nimrod.de", "www.nimrod-test.de")
echo x == "<a href=\"nimrod.de\">www.nimrod-test.de"

View File

@@ -19,7 +19,7 @@ Welcome to Nimrod
.. code-block:: nimrod
# Filter key=value pairs
import regexprs
import re
for x in lines("myfile.txt"):
if x =~ r"(\w+)=(.*)":
@@ -70,7 +70,7 @@ Nimrod is expressive
generics, etc.
* User-defineable operators; code with new operators is often easier to read
than code which overloads built-in operators. In the code snippet, the
``=~`` operator is defined in the ``regexprs`` module.
``=~`` operator is defined in the ``re`` module.
* Macros can modify the abstract syntax tree at compile time.

View File

@@ -24,6 +24,7 @@ Bugfixes
- Fixed ``unicode.toUTF8``.
- The compiler now rejects ``'\n'``.
- ``times.getStartMilsecs()`` now works on Mac OS X.
- Fixed a bug in ``pegs.match`` concerning start offsets.
Additions
@@ -45,6 +46,7 @@ Additions
- Added ``xmltree`` module.
- Added ``xmlparser`` module.
- Added ``htmlparser`` module.
- Added ``re`` module.
- Many wrappers now do not contain redundant name prefixes (like ``GTK_``,
``lua``). The new wrappers are available in ``lib/newwrap``. Change
your configuration file to use these.
@@ -72,6 +74,7 @@ Changes affecting backwards compatibility
named arguments only, because the parameter order will change the next
version!
- ``atomic`` and ``let`` are now keywords.
- The ``\w`` character class for pegs now includes the digits ``'0'..'9'``.
2009-12-21 Version 0.8.6 released

View File

@@ -1,4 +1,4 @@
import strutils
echo "Give a list of integers (separated by spaces): ",
stdin.readLine.splitSeq.each(parseInt).max,
stdin.readLine.split.each(parseInt).max,
" is the maximum!"