diff --git a/README.asciidoc b/README.asciidoc index dfdc305fe8..26a9c66168 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -36,7 +36,7 @@ Finds the given pattern in the string between the end and start positions. `start` :: The start point at which to start matching. `|abc` is `0`; `a|bc` is `1` `endpos` :: The maximum index for a match; `int.high` means the end of the - string, otherwise it's an exclusive upper bound. + string, otherwise it's an inclusive upper bound. [[proc-match]] ==== match(string, Regex, start = 0, endpos = int.high): RegexMatch diff --git a/src/nre.nim b/src/nre.nim index a9bbc31d58..af5dab785f 100644 --- a/src/nre.nim +++ b/src/nre.nim @@ -311,7 +311,7 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, flags: int): Opt result.pcreMatchBounds = newSeq[Slice[cint]](ceil(vecsize / 2).int) result.pcreMatchBounds.setLen(vecsize div 3) - let strlen = if endpos == int.high: str.len else: endpos + let strlen = if endpos == int.high: str.len else: endpos+1 let execRet = pcre.exec(pattern.pcreObj, pattern.pcreExtra, @@ -335,7 +335,7 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R # see pcredemo for explaination let matchesCrLf = pattern.matchesCrLf() let unicode = (getinfo[cint](pattern, pcre.INFO_OPTIONS) and pcre.UTF8) > 0 - let endpos = if endpos == int.high: str.len else: endpos + let strlen = if endpos == int.high: str.len else: endpos+1 var offset = start var match: Option[RegexMatch] @@ -361,13 +361,13 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R elif unicode: # XXX what about invalid unicode? offset += str.runeLenAt(offset) - assert(offset <= endpos) + assert(offset <= strlen) else: offset = match.get.matchBounds.b + 1 yield match.get - if offset >= endpos: + if offset >= strlen: # do while break @@ -390,11 +390,11 @@ proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string] var bounds = 0 .. -1 for match in str.findIter(pattern, start = start): - # upper bound is exclusive, lower is inclusive: + # bounds are inclusive: # # 0123456 # ^^^ - # (1, 4) + # (1, 3) bounds = match.matchBounds # "12".split("") would be @["", "1", "2"], but diff --git a/test/match.nim b/test/match.nim index 6cc76643cf..16fb931c0e 100644 --- a/test/match.nim +++ b/test/match.nim @@ -1,9 +1,10 @@ include nre, unittest, optional_t.nonstrict suite "match": - test "upper bound must be exclusive": - check("abc".match(re"abc", endpos = 0) == None[RegexMatch]()) - check("abc".match(re"abc", endpos = 3) != None[RegexMatch]()) + test "upper bound must be inclusive": + check("abc".match(re"abc", endpos = -1) == None[RegexMatch]()) + check("abc".match(re"abc", endpos = 1) == None[RegexMatch]()) + check("abc".match(re"abc", endpos = 2) != None[RegexMatch]()) test "match examples": check("abc".match(re"(\w)").captures[0] == "a")