Change endpos to inclusive

2026-02-15 23:54:19 +00:00 · 2015-04-09 23:49:26 +03:00
parent 7e44c08270
commit 2f0375c4c8
3 changed files with 11 additions and 10 deletions
--- a/README.asciidoc
+++ b/README.asciidoc
@@ -36,7 +36,7 @@ Finds the given pattern in the string between the end and start positions.
 `start` :: The start point at which to start matching. `|abc` is `0`; `a|bc`
   is `1`
 `endpos` :: The maximum index for a match; `int.high` means the end of the
-   string, otherwise it's an exclusive upper bound.
+   string, otherwise it's an inclusive upper bound.

 [[proc-match]]
 ==== match(string, Regex, start = 0, endpos = int.high): RegexMatch
--- a/src/nre.nim
+++ b/src/nre.nim
@@ -311,7 +311,7 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, flags: int): Opt
  result.pcreMatchBounds = newSeq[Slice[cint]](ceil(vecsize / 2).int)
  result.pcreMatchBounds.setLen(vecsize div 3)

-  let strlen = if endpos == int.high: str.len else: endpos
+  let strlen = if endpos == int.high: str.len else: endpos+1

  let execRet = pcre.exec(pattern.pcreObj,
                          pattern.pcreExtra,
@@ -335,7 +335,7 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
  # see pcredemo for explaination
  let matchesCrLf = pattern.matchesCrLf()
  let unicode = (getinfo[cint](pattern, pcre.INFO_OPTIONS) and pcre.UTF8) > 0
-  let endpos = if endpos == int.high: str.len else: endpos
+  let strlen = if endpos == int.high: str.len else: endpos+1

  var offset = start
  var match: Option[RegexMatch]
@@ -361,13 +361,13 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
      elif unicode:
        # XXX what about invalid unicode?
        offset += str.runeLenAt(offset)
-        assert(offset <= endpos)
+        assert(offset <= strlen)
    else:
      offset = match.get.matchBounds.b + 1

      yield match.get

-    if offset >= endpos:
+    if offset >= strlen:
      # do while
      break

@@ -390,11 +390,11 @@ proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string]
  var bounds = 0 .. -1

  for match in str.findIter(pattern, start = start):
-    # upper bound is exclusive, lower is inclusive:
+    # bounds are inclusive:
    #
    # 0123456
    #  ^^^
-    # (1, 4)
+    # (1, 3)
    bounds = match.matchBounds

    # "12".split("") would be @["", "1", "2"], but
--- a/test/match.nim
+++ b/test/match.nim
@@ -1,9 +1,10 @@
 include nre, unittest, optional_t.nonstrict

 suite "match":
-  test "upper bound must be exclusive":
-    check("abc".match(re"abc", endpos = 0) == None[RegexMatch]())
-    check("abc".match(re"abc", endpos = 3) != None[RegexMatch]())
+  test "upper bound must be inclusive":
+    check("abc".match(re"abc", endpos = -1) == None[RegexMatch]())
+    check("abc".match(re"abc", endpos = 1) == None[RegexMatch]())
+    check("abc".match(re"abc", endpos = 2) != None[RegexMatch]())

  test "match examples":
    check("abc".match(re"(\w)").captures[0] == "a")