From ee29370f6039df749f8b3a450b545ea220b6bd08 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Iv=C3=A1n=20Montes?= <drslump@pollinimini.net>
Date: Mon, 13 Aug 2018 11:42:50 +0200
Subject: [PATCH] Fixed 7478: splitLines keepEol option (#8621)

---
 lib/pure/strutils.nim     | 33 +++++++++++++++++++++------------
 tests/stdlib/tstrutil.nim |  9 ++++++++-
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim
index f8c5f9a916..be7ff60a21 100644
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -624,12 +624,13 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
   ## Substrings are separated from the right by the string `sep`
   rsplitCommon(s, sep, maxsplit, sep.len)
 
-iterator splitLines*(s: string): string =
+iterator splitLines*(s: string, keepEol = false): string =
   ## Splits the string `s` into its containing lines.
   ##
   ## Every `character literal <manual.html#character-literals>`_ newline
   ## combination (CR, LF, CR-LF) is supported. The result strings contain no
-  ## trailing ``\n``.
+  ## trailing end of line characters unless parameter ``keepEol`` is set to
+  ## ``true``.
   ##
   ## Example:
   ##
@@ -649,22 +650,30 @@ iterator splitLines*(s: string): string =
   ##   ""
   var first = 0
   var last = 0
+  var eolpos = 0
   while true:
     while last < s.len and s[last] notin {'\c', '\l'}: inc(last)
-    yield substr(s, first, last-1)
-    # skip newlines:
-    if last >= s.len: break
-    if s[last] == '\l': inc(last)
-    elif s[last] == '\c':
-      inc(last)
-      if last < s.len and s[last] == '\l': inc(last)
+
+    eolpos = last
+    if last < s.len:
+      if s[last] == '\l': inc(last)
+      elif s[last] == '\c':
+        inc(last)
+        if last < s.len and s[last] == '\l': inc(last)
+
+    yield substr(s, first, if keepEol: last-1 else: eolpos-1)
+
+    # no eol characters consumed means that the string is over
+    if eolpos == last:
+      break
+
     first = last
 
-proc splitLines*(s: string): seq[string] {.noSideEffect,
+proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect,
   rtl, extern: "nsuSplitLines".} =
   ## The same as the `splitLines <#splitLines.i,string>`_ iterator, but is a
   ## proc that returns a sequence of substrings.
-  accumulateResult(splitLines(s))
+  accumulateResult(splitLines(s, keepEol=keepEol))
 
 proc countLines*(s: string): int {.noSideEffect,
   rtl, extern: "nsuCountLines".} =
@@ -908,7 +917,7 @@ proc parseOctInt*(s: string): int {.noSideEffect,
   ## `s` are ignored.
   let L = parseutils.parseOct(s, result, 0)
   if L != s.len or L == 0:
-    raise newException(ValueError, "invalid oct integer: " & s)  
+    raise newException(ValueError, "invalid oct integer: " & s)
 
 proc parseHexInt*(s: string): int {.noSideEffect, procvar,
   rtl, extern: "nsuParseHexInt".} =
diff --git a/tests/stdlib/tstrutil.nim b/tests/stdlib/tstrutil.nim
index 4d4081d390..f0ee755f7c 100644
--- a/tests/stdlib/tstrutil.nim
+++ b/tests/stdlib/tstrutil.nim
@@ -199,6 +199,12 @@ proc testRFind =
   assert "0123456789ABCDEFGAH".rfind({'A'..'C'}, 13) == 12
   assert "0123456789ABCDEFGAH".rfind({'G'..'H'}, 13) == -1
 
+proc testSplitLines() =
+  let fixture = "a\nb\rc\r\nd"
+  assert len(fixture.splitLines) == 4
+  assert splitLines(fixture) == @["a", "b", "c", "d"]
+  assert splitLines(fixture, keepEol=true) == @["a\n", "b\r", "c\r\n", "d"]
+
 proc testCountLines =
   proc assertCountLines(s: string) = assert s.countLines == s.splitLines.len
   assertCountLines("")
@@ -229,7 +235,7 @@ proc testParseInts =
   assert "72".parseHexInt == 114
   assert "FF".parseHexInt == 255
   assert "ff".parseHexInt == 255
-  assert "fF".parseHexInt == 255  
+  assert "fF".parseHexInt == 255
   assert "0x7_2".parseHexInt == 114
   rejectParse "".parseHexInt
   rejectParse "_".parseHexInt
@@ -252,6 +258,7 @@ proc testParseInts =
 testDelete()
 testFind()
 testRFind()
+testSplitLines()
 testCountLines()
 testParseInts()