diff --git a/README.asciidoc b/README.asciidoc index d8cb6e0961..a0d5bdcdbd 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -53,10 +53,10 @@ Variants: - `proc findAll(...)` returns a `seq[string]` [[proc-split]] -==== split(string, Regex, maxsplit = -1): seq[string] +==== split(string, Regex, maxsplit = -1, start = 0): seq[string] Splits the string with the given regex. This works according to the rules that -Perl and Javascript use. +Perl and Javascript use: - If the match is zero-width, then the string is still split: `"123".split(r"") == @["1", "2", "3"]`. @@ -66,6 +66,8 @@ Perl and Javascript use. times. This means that there will be `maxsplit` strings in the output seq. `"1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]` +`start` behaves the same as in link:#proc-find[`find(...)`]. + [[proc-replace]] ==== replace(string, Regex, sub): string diff --git a/src/nre.nim b/src/nre.nim index 47e374978b..6743a82dcb 100644 --- a/src/nre.nim +++ b/src/nre.nim @@ -384,13 +384,13 @@ proc renderBounds(str: string, bounds: Slice[int]): string = for i in bounds.a .. bounds.b: result.add("^") -proc split*(str: string, pattern: Regex, maxSplit = -1): seq[string] = +proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string] = result = @[] - var lastIdx = 0 + var lastIdx = start var splits = 0 var bounds: Slice[int] - for match in str.findIter(pattern): + for match in str.findIter(pattern, start = start): # upper bound is exclusive, lower is inclusive: # # 0123456 @@ -401,7 +401,7 @@ proc split*(str: string, pattern: Regex, maxSplit = -1): seq[string] = # "12".split("") would be @["", "1", "2"], but # if we skip an empty first match, it's the correct # @["1", "2"] - if bounds.a < bounds.b or bounds.a > 0: + if bounds.a < bounds.b or bounds.a > start: result.add(str.substr(lastIdx, bounds.a - 1)) splits += 1 diff --git a/test/split.nim b/test/split.nim index 7aed714a4d..62b88427b2 100644 --- a/test/split.nim +++ b/test/split.nim @@ -43,3 +43,8 @@ suite "string splitting": check("a.b" .split(re"\.") .join(",") == "a,b") check("" .split(re"") .len == 0) check(":" .split(re"") .len == 1) + + test "start position": + check("abc".split(re"", start = 1) == @["b", "c"]) + check("abc".split(re"", start = 2) == @["c"]) + check("abc".split(re"", start = 3) == newSeq[string]())