From 739a8ea06095871a92cc3ef9d35a7ca782390195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20J=C3=B6ud?= Date: Wed, 14 Oct 2015 13:44:20 +0200 Subject: [PATCH 1/3] added maxsplit argument to strutils.split --- lib/pure/strutils.nim | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index a78fed4b91..c9b23daf25 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -322,7 +322,7 @@ proc toOctal*(c: char): string {.noSideEffect, rtl, extern: "nsuToOctal".} = result[i] = chr(val mod 8 + ord('0')) val = val div 8 -iterator split*(s: string, seps: set[char] = Whitespace): string = +iterator split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a group of separators. ## ## Substrings are separated by a substring containing only `seps`. Note @@ -367,15 +367,20 @@ iterator split*(s: string, seps: set[char] = Whitespace): string = ## "08.398990" ## var last = 0 + var splits = maxsplit assert(not ('\0' in seps)) while last < len(s): while s[last] in seps: inc(last) var first = last while last < len(s) and s[last] notin seps: inc(last) # BUGFIX! if first <= last-1: + if splits == 0: + yield substr(s, first, len(s)-1) + break yield substr(s, first, last-1) + dec(splits) -iterator split*(s: string, sep: char): string = +iterator split*(s: string, sep: char, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a single separator. ## ## Substrings are separated by the character `sep`. @@ -402,26 +407,36 @@ iterator split*(s: string, sep: char): string = ## "" ## var last = 0 + var splits = maxsplit assert('\0' != sep) if len(s) > 0: # `<=` is correct here for the edge cases! while last <= len(s): var first = last while last < len(s) and s[last] != sep: inc(last) + if splits == 0: + yield substr(s, first, len(s)-1) + break yield substr(s, first, last-1) + dec(splits) inc(last) -iterator split*(s: string, sep: string): string = +iterator split*(s: string, sep: string, maxsplit: int = -1): string = ## Splits the string `s` into substrings using a string separator. ## ## Substrings are separated by the string `sep`. var last = 0 + var splits = maxsplit if len(s) > 0: while last <= len(s): var first = last while last < len(s) and s.substr(last, last + `_, but is a ## proc that returns a sequence of substrings. - accumulateResult(split(s, seps)) + accumulateResult(split(s, seps, maxsplit)) -proc split*(s: string, sep: char): seq[string] {.noSideEffect, +proc split*(s: string, sep: char, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuSplitChar".} = ## The same as the `split iterator <#split.i,string,char>`_, but is a proc ## that returns a sequence of substrings. - accumulateResult(split(s, sep)) + accumulateResult(split(s, sep, maxsplit)) -proc split*(s: string, sep: string): seq[string] {.noSideEffect, +proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nsuSplitString".} = ## Splits the string `s` into substrings using a string separator. ## ## Substrings are separated by the string `sep`. This is a wrapper around the ## `split iterator <#split.i,string,string>`_. - accumulateResult(split(s, sep)) + accumulateResult(split(s, sep, maxsplit)) proc toHex*(x: BiggestInt, len: Positive): string {.noSideEffect, rtl, extern: "nsuToHex".} = @@ -1660,7 +1675,7 @@ when isMainModule: doAssert isAlpha("Rasp") doAssert isAlpha("Args") doAssert(not isAlpha("$Tomato")) - + doAssert isAlphaNumeric('3') doAssert isAlphaNumeric('R') doAssert(not isAlphaNumeric('!')) From 4e8e5af934d7c44f5c6fe659b4dcca1e16cf964d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20J=C3=B6ud?= Date: Wed, 14 Oct 2015 14:00:51 +0200 Subject: [PATCH 2/3] added tests for strutils.split --- lib/pure/strutils.nim | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index c9b23daf25..516ca953b8 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -1719,3 +1719,9 @@ when isMainModule: doAssert isUpper("ABC") doAssert(not isUpper("AAcc")) doAssert(not isUpper("A#$")) + + let s = " this is an example " + doAssert s.split() == @["this", "is", "an", "example"] + doAssert s.split(maxsplit=4) == @["this", "is", "an", "example"] + doAssert s.split(' ', maxsplit=4) == @["", "this", "", "", "is an example "] + doAssert s.split(" ", maxsplit=4) == @["", "this", "", "", "is an example "] From 755d89e32d39eb08e85a98b421909a7535051c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20J=C3=B6ud?= Date: Wed, 14 Oct 2015 15:29:27 +0200 Subject: [PATCH 3/3] modified strutils.split --- lib/pure/strutils.nim | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 516ca953b8..6eb87d91bd 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -374,10 +374,9 @@ iterator split*(s: string, seps: set[char] = Whitespace, maxsplit: int = -1): st var first = last while last < len(s) and s[last] notin seps: inc(last) # BUGFIX! if first <= last-1: - if splits == 0: - yield substr(s, first, len(s)-1) - break + if splits == 0: last = len(s) yield substr(s, first, last-1) + if splits == 0: break dec(splits) iterator split*(s: string, sep: char, maxsplit: int = -1): string = @@ -414,10 +413,9 @@ iterator split*(s: string, sep: char, maxsplit: int = -1): string = while last <= len(s): var first = last while last < len(s) and s[last] != sep: inc(last) - if splits == 0: - yield substr(s, first, len(s)-1) - break + if splits == 0: last = len(s) yield substr(s, first, last-1) + if splits == 0: break dec(splits) inc(last) @@ -432,10 +430,9 @@ iterator split*(s: string, sep: string, maxsplit: int = -1): string = var first = last while last < len(s) and s.substr(last, last +