From ee29370f6039df749f8b3a450b545ea220b6bd08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Montes?= Date: Mon, 13 Aug 2018 11:42:50 +0200 Subject: [PATCH] Fixed 7478: splitLines keepEol option (#8621) --- lib/pure/strutils.nim | 33 +++++++++++++++++++++------------ tests/stdlib/tstrutil.nim | 9 ++++++++- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index f8c5f9a916..be7ff60a21 100644 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -624,12 +624,13 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1, ## Substrings are separated from the right by the string `sep` rsplitCommon(s, sep, maxsplit, sep.len) -iterator splitLines*(s: string): string = +iterator splitLines*(s: string, keepEol = false): string = ## Splits the string `s` into its containing lines. ## ## Every `character literal `_ newline ## combination (CR, LF, CR-LF) is supported. The result strings contain no - ## trailing ``\n``. + ## trailing end of line characters unless parameter ``keepEol`` is set to + ## ``true``. ## ## Example: ## @@ -649,22 +650,30 @@ iterator splitLines*(s: string): string = ## "" var first = 0 var last = 0 + var eolpos = 0 while true: while last < s.len and s[last] notin {'\c', '\l'}: inc(last) - yield substr(s, first, last-1) - # skip newlines: - if last >= s.len: break - if s[last] == '\l': inc(last) - elif s[last] == '\c': - inc(last) - if last < s.len and s[last] == '\l': inc(last) + + eolpos = last + if last < s.len: + if s[last] == '\l': inc(last) + elif s[last] == '\c': + inc(last) + if last < s.len and s[last] == '\l': inc(last) + + yield substr(s, first, if keepEol: last-1 else: eolpos-1) + + # no eol characters consumed means that the string is over + if eolpos == last: + break + first = last -proc splitLines*(s: string): seq[string] {.noSideEffect, +proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect, rtl, extern: "nsuSplitLines".} = ## The same as the `splitLines <#splitLines.i,string>`_ iterator, but is a ## proc that returns a sequence of substrings. - accumulateResult(splitLines(s)) + accumulateResult(splitLines(s, keepEol=keepEol)) proc countLines*(s: string): int {.noSideEffect, rtl, extern: "nsuCountLines".} = @@ -908,7 +917,7 @@ proc parseOctInt*(s: string): int {.noSideEffect, ## `s` are ignored. let L = parseutils.parseOct(s, result, 0) if L != s.len or L == 0: - raise newException(ValueError, "invalid oct integer: " & s) + raise newException(ValueError, "invalid oct integer: " & s) proc parseHexInt*(s: string): int {.noSideEffect, procvar, rtl, extern: "nsuParseHexInt".} = diff --git a/tests/stdlib/tstrutil.nim b/tests/stdlib/tstrutil.nim index 4d4081d390..f0ee755f7c 100644 --- a/tests/stdlib/tstrutil.nim +++ b/tests/stdlib/tstrutil.nim @@ -199,6 +199,12 @@ proc testRFind = assert "0123456789ABCDEFGAH".rfind({'A'..'C'}, 13) == 12 assert "0123456789ABCDEFGAH".rfind({'G'..'H'}, 13) == -1 +proc testSplitLines() = + let fixture = "a\nb\rc\r\nd" + assert len(fixture.splitLines) == 4 + assert splitLines(fixture) == @["a", "b", "c", "d"] + assert splitLines(fixture, keepEol=true) == @["a\n", "b\r", "c\r\n", "d"] + proc testCountLines = proc assertCountLines(s: string) = assert s.countLines == s.splitLines.len assertCountLines("") @@ -229,7 +235,7 @@ proc testParseInts = assert "72".parseHexInt == 114 assert "FF".parseHexInt == 255 assert "ff".parseHexInt == 255 - assert "fF".parseHexInt == 255 + assert "fF".parseHexInt == 255 assert "0x7_2".parseHexInt == 114 rejectParse "".parseHexInt rejectParse "_".parseHexInt @@ -252,6 +258,7 @@ proc testParseInts = testDelete() testFind() testRFind() +testSplitLines() testCountLines() testParseInts()