diff --git a/src/nre.nim b/src/nre.nim index f9c6b3509a..b601d82225 100644 --- a/src/nre.nim +++ b/src/nre.nim @@ -352,16 +352,16 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R if match.isNone: # either the end of the input or the string # cannot be split here - offset += 1 - if matchesCrLf and offset < (str.len - 1) and str[offset] == '\r' and str[offset + 1] == '\l': # if PCRE treats CrLf as newline, skip both at the same time - offset += 1 + offset += 2 elif unicode: # XXX what about invalid unicode? offset += str.runeLenAt(offset) assert(offset <= strlen) + else: + offset += 1 else: offset = match.get.matchBounds.b + 1 diff --git a/test/find.nim b/test/find.nim index 8ab70fb420..c138c2b28e 100644 --- a/test/find.nim +++ b/test/find.nim @@ -19,4 +19,6 @@ suite "find": test "len 0 find": check("".findAll(re"\ ") == newSeq[string]()) check("".findAll(re"") == @[""]) - check("word word".findAll(nre.re"\b") == @["", "", "", ""]) + check("word word".findAll(re"\b") == @["", "", "", ""]) + check("word\r\lword".findAll(re(r"$", "m")) == @["", ""]) + check("слово слово".findAll(re(r"\b", "uW")) == @["", "", "", ""]) diff --git a/test/misc.nim b/test/misc.nim index 8ec0ea8781..a515db889e 100644 --- a/test/misc.nim +++ b/test/misc.nim @@ -4,4 +4,3 @@ suite "Misc tests": test "unicode": check("".find(re("", "8")).match == "") check("перевірка".replace(re(r"\w", "uW"), "") == "") - diff --git a/test/split.nim b/test/split.nim index 0f4e8d24a8..fc2bbe1b47 100644 --- a/test/split.nim +++ b/test/split.nim @@ -21,6 +21,8 @@ suite "string splitting": check("12345".split(re("")) == @["1", "2", "3", "4", "5"]) check("".split(re"") == newSeq[string]()) check("word word".split(re"\b") == @["word", " ", "word"]) + check("word\r\lword".split(re(r"$", "m")) == @["word", "\r\lword"]) + check("слово слово".split(re(r"(\b)", "uW")) == @["", "слово", "", " ", "", "слово", ""]) test "perl split tests": check("forty-two" .split(re"") .join(",") == "f,o,r,t,y,-,t,w,o")