Fixed 7478: splitLines keepEol option (#8621)

2026-02-14 15:23:27 +00:00 · 2018-08-13 11:42:50 +02:00
parent e839c01f5b
commit ee29370f60
2 changed files with 29 additions and 13 deletions
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -624,12 +624,13 @@ iterator rsplit*(s: string, sep: string, maxsplit: int = -1,
  ## Substrings are separated from the right by the string `sep`
  rsplitCommon(s, sep, maxsplit, sep.len)

-iterator splitLines*(s: string): string =
+iterator splitLines*(s: string, keepEol = false): string =
  ## Splits the string `s` into its containing lines.
  ##
  ## Every `character literal <manual.html#character-literals>`_ newline
  ## combination (CR, LF, CR-LF) is supported. The result strings contain no
-  ## trailing ``\n``.
+  ## trailing end of line characters unless parameter ``keepEol`` is set to
+  ## ``true``.
  ##
  ## Example:
  ##
@@ -649,22 +650,30 @@ iterator splitLines*(s: string): string =
  ##   ""
  var first = 0
  var last = 0
+  var eolpos = 0
  while true:
    while last < s.len and s[last] notin {'\c', '\l'}: inc(last)
-    yield substr(s, first, last-1)
-    # skip newlines:
-    if last >= s.len: break
-    if s[last] == '\l': inc(last)
-    elif s[last] == '\c':
-      inc(last)
-      if last < s.len and s[last] == '\l': inc(last)
+
+    eolpos = last
+    if last < s.len:
+      if s[last] == '\l': inc(last)
+      elif s[last] == '\c':
+        inc(last)
+        if last < s.len and s[last] == '\l': inc(last)
+
+    yield substr(s, first, if keepEol: last-1 else: eolpos-1)
+
+    # no eol characters consumed means that the string is over
+    if eolpos == last:
+      break
+
    first = last

-proc splitLines*(s: string): seq[string] {.noSideEffect,
+proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect,
  rtl, extern: "nsuSplitLines".} =
  ## The same as the `splitLines <#splitLines.i,string>`_ iterator, but is a
  ## proc that returns a sequence of substrings.
-  accumulateResult(splitLines(s))
+  accumulateResult(splitLines(s, keepEol=keepEol))

 proc countLines*(s: string): int {.noSideEffect,
  rtl, extern: "nsuCountLines".} =
@@ -908,7 +917,7 @@ proc parseOctInt*(s: string): int {.noSideEffect,
  ## `s` are ignored.
  let L = parseutils.parseOct(s, result, 0)
  if L != s.len or L == 0:
-    raise newException(ValueError, "invalid oct integer: " & s)  
+    raise newException(ValueError, "invalid oct integer: " & s)

 proc parseHexInt*(s: string): int {.noSideEffect, procvar,
  rtl, extern: "nsuParseHexInt".} =
--- a/tests/stdlib/tstrutil.nim
+++ b/tests/stdlib/tstrutil.nim
@@ -199,6 +199,12 @@ proc testRFind =
  assert "0123456789ABCDEFGAH".rfind({'A'..'C'}, 13) == 12
  assert "0123456789ABCDEFGAH".rfind({'G'..'H'}, 13) == -1

+proc testSplitLines() =
+  let fixture = "a\nb\rc\r\nd"
+  assert len(fixture.splitLines) == 4
+  assert splitLines(fixture) == @["a", "b", "c", "d"]
+  assert splitLines(fixture, keepEol=true) == @["a\n", "b\r", "c\r\n", "d"]
+
 proc testCountLines =
  proc assertCountLines(s: string) = assert s.countLines == s.splitLines.len
  assertCountLines("")
@@ -229,7 +235,7 @@ proc testParseInts =
  assert "72".parseHexInt == 114
  assert "FF".parseHexInt == 255
  assert "ff".parseHexInt == 255
-  assert "fF".parseHexInt == 255  
+  assert "fF".parseHexInt == 255
  assert "0x7_2".parseHexInt == 114
  rejectParse "".parseHexInt
  rejectParse "_".parseHexInt
@@ -252,6 +258,7 @@ proc testParseInts =
 testDelete()
 testFind()
 testRFind()
+testSplitLines()
 testCountLines()
 testParseInts()