mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-18 13:30:33 +00:00
This commit is contained in:
@@ -516,23 +516,23 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
|
||||
let unicode = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS) and
|
||||
pcre.UTF8) > 0u32
|
||||
let strlen = if endpos == int.high: str.len else: endpos+1
|
||||
|
||||
var offset = start
|
||||
var match: Option[RegexMatch]
|
||||
var neverMatched = true
|
||||
|
||||
while true:
|
||||
var flags = 0
|
||||
|
||||
if match.isSome and
|
||||
match.get.matchBounds.a > match.get.matchBounds.b:
|
||||
# 0-len match
|
||||
flags = pcre.NOTEMPTY_ATSTART
|
||||
|
||||
match = str.matchImpl(pattern, offset, endpos, flags)
|
||||
|
||||
if match.isNone:
|
||||
# either the end of the input or the string
|
||||
# cannot be split here
|
||||
if offset >= strlen:
|
||||
# cannot be split here - we also need to bail
|
||||
# if we've never matched and we've already tried to...
|
||||
if offset >= strlen or neverMatched:
|
||||
break
|
||||
|
||||
if matchesCrLf and offset < (str.len - 1) and
|
||||
@@ -546,11 +546,11 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
|
||||
else:
|
||||
offset += 1
|
||||
else:
|
||||
neverMatched = false
|
||||
offset = match.get.matchBounds.b + 1
|
||||
|
||||
yield match.get
|
||||
|
||||
|
||||
proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
|
||||
## Finds the given pattern in the string between the end and start
|
||||
## positions.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import unittest, sequtils
|
||||
import nre except toSeq
|
||||
import optional_nonstrict
|
||||
import times, strutils
|
||||
|
||||
suite "find":
|
||||
test "find text":
|
||||
@@ -25,3 +26,21 @@ suite "find":
|
||||
check("word word".findAll(re"\b") == @["", "", "", ""])
|
||||
check("word\r\lword".findAll(re"(*ANYCRLF)(?m)$") == @["", ""])
|
||||
check("слово слово".findAll(re"(*U)\b") == @["", "", "", ""])
|
||||
|
||||
test "bail early":
|
||||
## we expect nothing to be found and we should be bailing out early which means that
|
||||
## the timing difference between searching in small and large data should be well
|
||||
## within a tolerance area
|
||||
const tolerance = 0.0001
|
||||
var smallData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\"", 10)
|
||||
var largeData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\"", 1000000)
|
||||
var start = cpuTime()
|
||||
check(largeData.findAll(re"url.*? = '(.*?)'") == newSeq[string]())
|
||||
var stop = cpuTime()
|
||||
var elapsedLarge = stop - start
|
||||
start = cpuTime()
|
||||
check(smallData.findAll(re"url.*? = '(.*?)'") == newSeq[string]())
|
||||
stop = cpuTime()
|
||||
var elapsedSmall = stop - start
|
||||
var difference = elapsedLarge - elapsedSmall
|
||||
check(difference < tolerance)
|
||||
|
||||
Reference in New Issue
Block a user