mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-01 19:02:18 +00:00
Fix zero-length matches for multibyte characters
This commit is contained in:
@@ -352,16 +352,16 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
|
||||
if match.isNone:
|
||||
# either the end of the input or the string
|
||||
# cannot be split here
|
||||
offset += 1
|
||||
|
||||
if matchesCrLf and offset < (str.len - 1) and
|
||||
str[offset] == '\r' and str[offset + 1] == '\l':
|
||||
# if PCRE treats CrLf as newline, skip both at the same time
|
||||
offset += 1
|
||||
offset += 2
|
||||
elif unicode:
|
||||
# XXX what about invalid unicode?
|
||||
offset += str.runeLenAt(offset)
|
||||
assert(offset <= strlen)
|
||||
else:
|
||||
offset += 1
|
||||
else:
|
||||
offset = match.get.matchBounds.b + 1
|
||||
|
||||
|
||||
@@ -19,4 +19,6 @@ suite "find":
|
||||
test "len 0 find":
|
||||
check("".findAll(re"\ ") == newSeq[string]())
|
||||
check("".findAll(re"") == @[""])
|
||||
check("word word".findAll(nre.re"\b") == @["", "", "", ""])
|
||||
check("word word".findAll(re"\b") == @["", "", "", ""])
|
||||
check("word\r\lword".findAll(re(r"$", "m<anycrlf>")) == @["", ""])
|
||||
check("слово слово".findAll(re(r"\b", "uW")) == @["", "", "", ""])
|
||||
|
||||
@@ -4,4 +4,3 @@ suite "Misc tests":
|
||||
test "unicode":
|
||||
check("".find(re("", "8")).match == "")
|
||||
check("перевірка".replace(re(r"\w", "uW"), "") == "")
|
||||
|
||||
|
||||
@@ -21,6 +21,8 @@ suite "string splitting":
|
||||
check("12345".split(re("")) == @["1", "2", "3", "4", "5"])
|
||||
check("".split(re"") == newSeq[string]())
|
||||
check("word word".split(re"\b") == @["word", " ", "word"])
|
||||
check("word\r\lword".split(re(r"$", "m<anycrlf>")) == @["word", "\r\lword"])
|
||||
check("слово слово".split(re(r"(\b)", "uW")) == @["", "слово", "", " ", "", "слово", ""])
|
||||
|
||||
test "perl split tests":
|
||||
check("forty-two" .split(re"") .join(",") == "f,o,r,t,y,-,t,w,o")
|
||||
|
||||
Reference in New Issue
Block a user