better implementation for wrapWords

This commit is contained in:
Araq
2018-11-08 16:00:49 +01:00
committed by Andreas Rumpf
parent 4f787ac4f4
commit 56f76c5b08
2 changed files with 80 additions and 58 deletions

View File

@@ -1,67 +1,88 @@
import unicode
#
#
# Nim's Runtime Library
# (c) Copyright 2018 Nim contributors
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
proc wordWrap*(s: string, maxLineWidth = 80,
import strutils, unicode
proc olen(s: string): int =
var i = 0
result = 0
while i < s.len:
inc result
let L = graphemeLen(s, i)
inc i, L
proc wrapWords*(s: string, maxLineWidth = 80,
splitLongWords = true,
newLine = "\n"): string =
## This function breaks all words that reach over `maxLineWidth`
## measured in number of runes. When `splitLongWords` is `true`
## words that are longer than `maxLineWidth` are splitted. Multiple
## spaces and newlines are converted to a single space. All
## whitespace is treated equally. Non-breaking whitespace is
## ignored.
var currentWordLength: int = 0
var currentWord: string = newStringOfCap(32)
var currentLineLength: int = 0
var currentWordLengthAtLineEnd: int = -1
var longWordMode = false
template handleWhitespace(): untyped =
if currentWord.len > 0:
if currentLineLength + 1 + currentWordLength > maxLineWidth:
result.add newLine
currentLineLength = 0
if currentLineLength > 0:
result.add ' '
currentLineLength += 1
result.add currentWord
currentLineLength += currentWordLength
currentWord.setlen 0
currentWordLength = 0
for rune in s.runes:
if rune.isWhiteSpace:
handleWhitespace()
seps: set[char] = Whitespace,
newLine = "\n"): string {.noSideEffect.} =
## Word wraps `s`.
result = newStringOfCap(s.len + s.len shr 6)
var spaceLeft = maxLineWidth
var lastSep = ""
for word, isSep in tokenize(s, seps):
let wlen = olen(word)
if isSep:
lastSep = word
spaceLeft = spaceLeft - wlen
elif wlen > spaceLeft:
if splitLongWords and wlen > maxLineWidth:
var i = 0
while i < word.len:
if spaceLeft <= 0:
spaceLeft = maxLineWidth
result.add newLine
dec spaceLeft
let L = graphemeLen(word, i)
for j in 0 ..< L: result.add word[i+j]
inc i, L
else:
spaceLeft = maxLineWidth - wlen
result.add(newLine)
result.add(word)
else:
if splitLongWords and currentWordLength >= maxLineWidth:
handleWhitespace()
currentWord.add rune
inc currentWordLength
handleWhitespace()
spaceLeft = spaceLeft - wlen
result.add(lastSep)
result.add(word)
lastSep.setLen(0)
when isMainModule:
import strutils
when true:
let
inp = """ this is a long text -- muchlongerthan10chars and here
it goes"""
outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes"
doAssert wrapWords(inp, 10, false) == outp
proc checkLineLength(arg: string): void =
for line in splitlines(arg):
var numRunes = 0
for rune in runes(line):
numRunes += 1
let
longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow"""
longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow"
doAssert wrapWords(longInp, 8, true) == longOutp
assert numRunes <= 80
# test we don't break Umlauts into invalid bytes:
let fies = "äöüöäöüöäöüöäöüööäöüöäößßßßüöäößßßßßß"
let fiesRes = "ä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nö\nä\nö\nü\nö\nä\nö\nß\nß\nß\nß\nü\nö\nä\nö\nß\nß\nß\nß\nß\nß"
doAssert wrapWords(fies, 1, true) == fiesRes
let longlongword = "abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"
let longlongword = """abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüö
äzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoen
rsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdr
iaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ
ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
let longlongwordRes = """
abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp
psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüü
öäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiq
fglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocf
qclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdr
tnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψ
ρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ
ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
doAssert wrapWords(longlongword) == longlongwordRes
checkLineLength(longlongword.wordWrap)
let tmp ="Наши исследования позволяют сделать вывод о том, что субъект выбирает xxxuiaetudtiraeüöätpghiacodöeronfdquiahgoüöädoiaqofhgiaeotrnuiaßqzfgiaoeurnudtitraenuitenruitarenitarenuitarentduiranetduiranetdruianetrnuiaertnuiatdenruiatdrne институциональный психоз. Важность этой функции подчеркивается тем фактом, что объект вызывает эгоцентризм. Самоактуализация аннигилирует генезис. Анима аннигилирует возрастной код. Закон просветляет аутотренинг. Наши исследования позволяют сделать вывод о том, что воспитание заметно осознаёт инсайт."
checkLineLength(tmp.wordWrap)