From 13e5a9ea6ca4eee1d2ff0e7fc9b3c1ceff7a31b4 Mon Sep 17 00:00:00 2001 From: Joseph Turner Date: Wed, 8 Apr 2015 16:35:55 +0100 Subject: [PATCH 1/3] Fixes matching error #2418 Fixes the split iterator, the main problem was with the incrementation of 'last'. Last was first incremented to the index of the first character after the match, but was then incremented again at the beginning of the while loop. This caused a problem if that character after the first match, also matched the regular expression. --- lib/impure/re.nim | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/lib/impure/re.nim b/lib/impure/re.nim index c24734f89d..93dc4922d5 100644 --- a/lib/impure/re.nim +++ b/lib/impure/re.nim @@ -373,23 +373,26 @@ iterator split*(s: string, sep: Regex): string = ## Results in: ## ## .. code-block:: nim + ## "" ## "this" ## "is" ## "an" ## "example" + ## "" ## var - first = 0 - last = 0 + first = -1 + last = -1 while last < len(s): var x = matchLen(s, sep, last) if x > 0: inc(last, x) first = last + if x == 0: inc(last) while last < len(s): - inc(last) x = matchLen(s, sep, last) - if x > 0: break - if first < last: + if x >= 0: break + inc(last) + if first <= last: yield substr(s, first, last-1) proc split*(s: string, sep: Regex): seq[string] = From 7ee6c72b51e8e0877bc734a30c0498e42f0ca99a Mon Sep 17 00:00:00 2001 From: Joseph Turner Date: Wed, 8 Apr 2015 21:58:31 +0100 Subject: [PATCH 2/3] Adds test case to is isMainModule block Adds the test case that originally caused the bug, issue #2418. --- lib/impure/re.nim | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/impure/re.nim b/lib/impure/re.nim index 93dc4922d5..ff2b70d2db 100644 --- a/lib/impure/re.nim +++ b/lib/impure/re.nim @@ -474,7 +474,12 @@ when isMainModule: var accum: seq[string] = @[] for word in split("00232this02939is39an22example111", re"\d+"): accum.add(word) - assert(accum == @["this", "is", "an", "example"]) + assert(accum == @["", "this", "is", "an", "example", ""]) + + accum = @[] + for word in split("AAA : : BBB", re"\s*:\s*"): + accum.add(word) + assert(accum == @["AAA", "", "BBB"]) for x in findAll("abcdef", re"^{.}", 3): assert x == "d" From 54d945c5123c2d6856216c4dc03abdf289e096da Mon Sep 17 00:00:00 2001 From: Joseph Turner Date: Thu, 9 Apr 2015 02:53:03 +0100 Subject: [PATCH 3/3] Adds note about change to news --- web/news.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/web/news.txt b/web/news.txt index 9fd7c31b35..af44f91a1f 100644 --- a/web/news.txt +++ b/web/news.txt @@ -73,7 +73,11 @@ News for ``expr`` and ``stmt``. The new names capture the semantics much better and most likely ``expr`` and ``stmt`` will be deprecated in favor of the new names. - + - The ``split`` method in module ``re`` has changed. It now handles the case + of matches having a length of 0, and empty strings being yielded from the + iterator. A notable change might be that a pattern being matched at the + beginning and end of a string, will result in an empty string being produced + at the start and the end of the iterator. Language Additions ------------------