fixes #5444 - nre.findIter keeps searching when no match is possible (#5453)

This commit is contained in:
Florent
2017-03-02 11:48:41 +01:00
committed by Andreas Rumpf
parent 32159ee827
commit 34a3d40d18
2 changed files with 25 additions and 6 deletions

View File

@@ -516,23 +516,23 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
let unicode = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS) and
pcre.UTF8) > 0u32
let strlen = if endpos == int.high: str.len else: endpos+1
var offset = start
var match: Option[RegexMatch]
var neverMatched = true
while true:
var flags = 0
if match.isSome and
match.get.matchBounds.a > match.get.matchBounds.b:
# 0-len match
flags = pcre.NOTEMPTY_ATSTART
match = str.matchImpl(pattern, offset, endpos, flags)
if match.isNone:
# either the end of the input or the string
# cannot be split here
if offset >= strlen:
# cannot be split here - we also need to bail
# if we've never matched and we've already tried to...
if offset >= strlen or neverMatched:
break
if matchesCrLf and offset < (str.len - 1) and
@@ -546,11 +546,11 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
else:
offset += 1
else:
neverMatched = false
offset = match.get.matchBounds.b + 1
yield match.get
proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
## Finds the given pattern in the string between the end and start
## positions.

View File

@@ -1,6 +1,7 @@
import unittest, sequtils
import nre except toSeq
import optional_nonstrict
import times, strutils
suite "find":
test "find text":
@@ -25,3 +26,21 @@ suite "find":
check("word word".findAll(re"\b") == @["", "", "", ""])
check("word\r\lword".findAll(re"(*ANYCRLF)(?m)$") == @["", ""])
check("слово слово".findAll(re"(*U)\b") == @["", "", "", ""])
test "bail early":
## we expect nothing to be found and we should be bailing out early which means that
## the timing difference between searching in small and large data should be well
## within a tolerance area
const tolerance = 0.0001
var smallData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\"", 10)
var largeData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\"", 1000000)
var start = cpuTime()
check(largeData.findAll(re"url.*? = &#39;(.*?)&#39;") == newSeq[string]())
var stop = cpuTime()
var elapsedLarge = stop - start
start = cpuTime()
check(smallData.findAll(re"url.*? = &#39;(.*?)&#39;") == newSeq[string]())
stop = cpuTime()
var elapsedSmall = stop - start
var difference = elapsedLarge - elapsedSmall
check(difference < tolerance)