mirror of
https://github.com/nim-lang/Nim.git
synced 2026-05-05 13:34:46 +00:00
better implementation for wrapWords
This commit is contained in:
@@ -1,67 +1,88 @@
|
||||
import unicode
|
||||
#
|
||||
#
|
||||
# Nim's Runtime Library
|
||||
# (c) Copyright 2018 Nim contributors
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
proc wordWrap*(s: string, maxLineWidth = 80,
|
||||
import strutils, unicode
|
||||
|
||||
proc olen(s: string): int =
|
||||
var i = 0
|
||||
result = 0
|
||||
while i < s.len:
|
||||
inc result
|
||||
let L = graphemeLen(s, i)
|
||||
inc i, L
|
||||
|
||||
proc wrapWords*(s: string, maxLineWidth = 80,
|
||||
splitLongWords = true,
|
||||
newLine = "\n"): string =
|
||||
## This function breaks all words that reach over `maxLineWidth`
|
||||
## measured in number of runes. When `splitLongWords` is `true`
|
||||
## words that are longer than `maxLineWidth` are splitted. Multiple
|
||||
## spaces and newlines are converted to a single space. All
|
||||
## whitespace is treated equally. Non-breaking whitespace is
|
||||
## ignored.
|
||||
|
||||
var currentWordLength: int = 0
|
||||
var currentWord: string = newStringOfCap(32)
|
||||
var currentLineLength: int = 0
|
||||
var currentWordLengthAtLineEnd: int = -1
|
||||
var longWordMode = false
|
||||
|
||||
template handleWhitespace(): untyped =
|
||||
if currentWord.len > 0:
|
||||
|
||||
if currentLineLength + 1 + currentWordLength > maxLineWidth:
|
||||
result.add newLine
|
||||
currentLineLength = 0
|
||||
|
||||
if currentLineLength > 0:
|
||||
result.add ' '
|
||||
currentLineLength += 1
|
||||
|
||||
result.add currentWord
|
||||
currentLineLength += currentWordLength
|
||||
|
||||
currentWord.setlen 0
|
||||
currentWordLength = 0
|
||||
|
||||
for rune in s.runes:
|
||||
if rune.isWhiteSpace:
|
||||
handleWhitespace()
|
||||
seps: set[char] = Whitespace,
|
||||
newLine = "\n"): string {.noSideEffect.} =
|
||||
## Word wraps `s`.
|
||||
result = newStringOfCap(s.len + s.len shr 6)
|
||||
var spaceLeft = maxLineWidth
|
||||
var lastSep = ""
|
||||
for word, isSep in tokenize(s, seps):
|
||||
let wlen = olen(word)
|
||||
if isSep:
|
||||
lastSep = word
|
||||
spaceLeft = spaceLeft - wlen
|
||||
elif wlen > spaceLeft:
|
||||
if splitLongWords and wlen > maxLineWidth:
|
||||
var i = 0
|
||||
while i < word.len:
|
||||
if spaceLeft <= 0:
|
||||
spaceLeft = maxLineWidth
|
||||
result.add newLine
|
||||
dec spaceLeft
|
||||
let L = graphemeLen(word, i)
|
||||
for j in 0 ..< L: result.add word[i+j]
|
||||
inc i, L
|
||||
else:
|
||||
spaceLeft = maxLineWidth - wlen
|
||||
result.add(newLine)
|
||||
result.add(word)
|
||||
else:
|
||||
if splitLongWords and currentWordLength >= maxLineWidth:
|
||||
handleWhitespace()
|
||||
|
||||
currentWord.add rune
|
||||
inc currentWordLength
|
||||
|
||||
handleWhitespace()
|
||||
|
||||
spaceLeft = spaceLeft - wlen
|
||||
result.add(lastSep)
|
||||
result.add(word)
|
||||
lastSep.setLen(0)
|
||||
|
||||
when isMainModule:
|
||||
import strutils
|
||||
|
||||
when true:
|
||||
let
|
||||
inp = """ this is a long text -- muchlongerthan10chars and here
|
||||
it goes"""
|
||||
outp = " this is a\nlong text\n--\nmuchlongerthan10chars\nand here\nit goes"
|
||||
doAssert wrapWords(inp, 10, false) == outp
|
||||
|
||||
proc checkLineLength(arg: string): void =
|
||||
for line in splitlines(arg):
|
||||
var numRunes = 0
|
||||
for rune in runes(line):
|
||||
numRunes += 1
|
||||
let
|
||||
longInp = """ThisIsOneVeryLongStringWhichWeWillSplitIntoEightSeparatePartsNow"""
|
||||
longOutp = "ThisIsOn\neVeryLon\ngStringW\nhichWeWi\nllSplitI\nntoEight\nSeparate\nPartsNow"
|
||||
doAssert wrapWords(longInp, 8, true) == longOutp
|
||||
|
||||
assert numRunes <= 80
|
||||
# test we don't break Umlauts into invalid bytes:
|
||||
let fies = "äöüöäöüöäöüöäöüööäöüöäößßßßüöäößßßßßß"
|
||||
let fiesRes = "ä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nä\nö\nü\nö\nö\nä\nö\nü\nö\nä\nö\nß\nß\nß\nß\nü\nö\nä\nö\nß\nß\nß\nß\nß\nß"
|
||||
doAssert wrapWords(fies, 1, true) == fiesRes
|
||||
|
||||
let longlongword = "abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"
|
||||
let longlongword = """abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüö
|
||||
äzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüüöäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiqfglwcßqfgxvlcwgtfhiaoen
|
||||
rsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocfqclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdrtnaetdr
|
||||
iaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ
|
||||
ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
|
||||
let longlongwordRes = """
|
||||
abc uitdaeröägfßhydüäpydqfü,träpydqgpmüdträpydföägpydörztdüöäfguiaeowäzjdtrüöäp
|
||||
psnrtuiydrözenrüöäpyfdqazpesnrtulocjtüöäzydgyqgfqfgprtnwjlcydkqgfüöezmäzydydqüü
|
||||
öäpdtrnvwfhgckdumböäpydfgtdgfhtdrntdrntydfogiayqfguiatrnydrntüöärtniaoeydfgaoeiq
|
||||
fglwcßqfgxvlcwgtfhiaoenrsüöäapmböäptdrniaoydfglckqfhouenrtsüöäptrniaoeyqfgulocf
|
||||
qclgwxßqflgcwßqfxglcwrniatrnmüböäpmöäbpümöäbpüöämpbaoestnriaesnrtdiaesrtdniaesdr
|
||||
tnaetdriaoenvlcyfglwckßqfgvwkßqgfvlwkßqfgvlwckßqvlwkgfUIαοιαοιαχολωχσωχνωκψρχκψ
|
||||
ρτιεαοσηζϵηζιοεννκεωνιαλωσωκνκψρκγτφγτχκγτεκργτιχνκιωχσιλωσλωχξλξλξωχωχ
|
||||
ξχλωωχαοεοιαεοαεοιαεοαεοιαοεσναοεκνρκψγκψφϵιηαααοε"""
|
||||
doAssert wrapWords(longlongword) == longlongwordRes
|
||||
|
||||
checkLineLength(longlongword.wordWrap)
|
||||
|
||||
let tmp ="Наши исследования позволяют сделать вывод о том, что субъект выбирает xxxuiaetudtiraeüöätpghiacodöeronfdquiahgoüöädoiaqofhgiaeotrnuiaßqzfgiaoeurnudtitraenuitenruitarenitarenuitarentduiranetduiranetdruianetrnuiaertnuiatdenruiatdrne институциональный психоз. Важность этой функции подчеркивается тем фактом, что объект вызывает эгоцентризм. Самоактуализация аннигилирует генезис. Анима аннигилирует возрастной код. Закон просветляет аутотренинг. Наши исследования позволяют сделать вывод о том, что воспитание заметно осознаёт инсайт."
|
||||
|
||||
checkLineLength(tmp.wordWrap)
|
||||
|
||||
Reference in New Issue
Block a user