optimized wrapWords; fixes #14579 (#14606) [backport:1.2]

2026-02-12 14:23:45 +00:00 · 2020-06-08 14:34:26 +02:00
parent e3f80e434a
commit c308c2e60b
1 changed files with 48 additions and 25 deletions
--- a/lib/std/wordwrap.nim
+++ b/lib/std/wordwrap.nim
@@ -11,10 +11,10 @@

 import strutils, unicode

-proc olen(s: string): int =
-  var i = 0
+proc olen(s: string; start, lastExclusive: int): int =
+  var i = start
  result = 0
-  while i < s.len:
+  while i < lastExclusive:
    inc result
    let L = graphemeLen(s, i)
    inc i, L
@@ -32,31 +32,46 @@ proc wrapWords*(s: string, maxLineWidth = 80,
  result = newStringOfCap(s.len + s.len shr 6)
  var spaceLeft = maxLineWidth
  var lastSep = ""
-  for word, isSep in tokenize(s, seps):
-    let wlen = olen(word)
+
+  var i = 0
+  while true:
+    var j = i
+    let isSep = j < s.len and s[j] in seps
+    while j < s.len and (s[j] in seps) == isSep: inc(j)
+    if j <= i: break
+    #yield (substr(s, i, j-1), isSep)
    if isSep:
-      lastSep = word
-      spaceLeft = spaceLeft - wlen
-    elif wlen > spaceLeft:
-      if splitLongWords and wlen > maxLineWidth:
-        var i = 0
-        while i < word.len:
-          if spaceLeft <= 0:
-            spaceLeft = maxLineWidth
-            result.add newLine
-          dec spaceLeft
-          let L = graphemeLen(word, i)
-          for j in 0 ..< L: result.add word[i+j]
-          inc i, L
+      lastSep.setLen 0
+      for k in i..<j:
+        if s[k] notin {'\L', '\C'}: lastSep.add s[k]
+      if lastSep.len == 0:
+        lastSep.add ' '
+        dec spaceLeft
      else:
-        spaceLeft = maxLineWidth - wlen
-        result.add(newLine)
-        result.add(word)
+        spaceLeft = spaceLeft - olen(lastSep, 0, lastSep.len)
    else:
-      spaceLeft = spaceLeft - wlen
-      result.add(lastSep)
-      result.add(word)
-      lastSep.setLen(0)
+      let wlen = olen(s, i, j)
+      if wlen > spaceLeft:
+        if splitLongWords and wlen > maxLineWidth:
+          var k = 0
+          while k < j - i:
+            if spaceLeft <= 0:
+              spaceLeft = maxLineWidth
+              result.add newLine
+            dec spaceLeft
+            let L = graphemeLen(s, k+i)
+            for m in 0 ..< L: result.add s[i+k+m]
+            inc k, L
+        else:
+          spaceLeft = maxLineWidth - wlen
+          result.add(newLine)
+          for k in i..<j: result.add(s[k])
+      else:
+        spaceLeft = spaceLeft - wlen
+        result.add(lastSep)
+        for k in i..<j: result.add(s[k])
+        #lastSep.setLen(0)
+    i = j

 when isMainModule:

@@ -93,3 +108,11 @@ tnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολ
 ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
  doAssert wrapWords(longlongword) == longlongwordRes

+  # bug #14579
+  const input60 = """
+This is a long string. It is manually wrapped to 60
+characters. I would not expect it to be changed by
+wordwrap if wordwrap is set to wrap at 80 characters"""
+  const input60Res = """This is a long string. It is manually wrapped to 60 characters. I would not
+expect it to be changed by wordwrap if wordwrap is set to wrap at 80 characters"""
+  doAssert wrapWords(input60) == input60Res