mirror of
https://github.com/nim-lang/Nim.git
synced 2026-06-04 19:04:46 +00:00
Added openarray[char] overloads to std/unicode (#20648)
* Added openarray[char] overloads to std/unicode Call substr instead of index slice inside unicode Added substr overload for openarray for parity with string functionality Made style checker happies and fixed overloads for substr * Added update to changelog [skip ci] * Inline unicode string operations * Moved substr overload to unicode Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
This commit is contained in:
@@ -137,6 +137,7 @@
|
||||
- Added [`jsre.hasIndices`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/hasIndices)
|
||||
- Added `capacity` for `string` and `seq` to return the current capacity, see https://github.com/nim-lang/RFCs/issues/460
|
||||
- Added `openArray[char]` overloads for `std/parseutils` allowing more code reuse.
|
||||
- Added `openArray[char]` overloads for `std/unicode` allowing more code reuse.
|
||||
- Added `safe` parameter to `base64.encodeMime`.
|
||||
|
||||
[//]: # "Deprecations:"
|
||||
|
||||
@@ -21,6 +21,16 @@
|
||||
## * `encodings module <encodings.html>`_
|
||||
|
||||
include "system/inclrtl"
|
||||
import std/strbasics
|
||||
template toOa(s: string): auto = s.toOpenArray(0, s.high)
|
||||
|
||||
proc substr(s: openArray[char] , first, last: int): string =
|
||||
# Copied substr from system
|
||||
let first = max(first, 0)
|
||||
let L = max(min(last, high(s)) - first + 1, 0)
|
||||
result = newString(L)
|
||||
for i in 0 .. L-1:
|
||||
result[i] = s[i+first]
|
||||
|
||||
type
|
||||
RuneImpl = int32 # underlying type of Rune
|
||||
@@ -32,7 +42,7 @@ type
|
||||
|
||||
template ones(n: untyped): untyped = ((1 shl n)-1)
|
||||
|
||||
proc runeLen*(s: string): int {.rtl, extern: "nuc$1".} =
|
||||
proc runeLen*(s: openArray[char]): int {.rtl, extern: "nuc$1".} =
|
||||
## Returns the number of runes of the string ``s``.
|
||||
runnableExamples:
|
||||
let a = "añyóng"
|
||||
@@ -51,7 +61,7 @@ proc runeLen*(s: string): int {.rtl, extern: "nuc$1".} =
|
||||
else: inc i
|
||||
inc(result)
|
||||
|
||||
proc runeLenAt*(s: string, i: Natural): int =
|
||||
proc runeLenAt*(s: openArray[char], i: Natural): int =
|
||||
## Returns the number of bytes the rune starting at ``s[i]`` takes.
|
||||
##
|
||||
## See also:
|
||||
@@ -71,7 +81,7 @@ proc runeLenAt*(s: string, i: Natural): int =
|
||||
|
||||
const replRune = Rune(0xFFFD)
|
||||
|
||||
template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
|
||||
template fastRuneAt*(s: openArray[char] or string, i: int, result: untyped, doInc = true) =
|
||||
## Returns the rune ``s[i]`` in ``result``.
|
||||
##
|
||||
## If ``doInc == true`` (default), ``i`` is incremented by the number
|
||||
@@ -149,7 +159,7 @@ template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
|
||||
result = Rune(uint(s[i]))
|
||||
when doInc: inc(i)
|
||||
|
||||
proc runeAt*(s: string, i: Natural): Rune =
|
||||
proc runeAt*(s: openArray[char], i: Natural): Rune =
|
||||
## Returns the rune in ``s`` at **byte index** ``i``.
|
||||
##
|
||||
## See also:
|
||||
@@ -163,7 +173,7 @@ proc runeAt*(s: string, i: Natural): Rune =
|
||||
doAssert a.runeAt(3) == "y".runeAt(0)
|
||||
fastRuneAt(s, i, result, false)
|
||||
|
||||
proc validateUtf8*(s: string): int =
|
||||
proc validateUtf8*(s: openArray[char]): int =
|
||||
## Returns the position of the invalid byte in ``s`` if the string ``s`` does
|
||||
## not hold valid UTF-8 data. Otherwise ``-1`` is returned.
|
||||
##
|
||||
@@ -300,7 +310,7 @@ proc `$`*(runes: seq[Rune]): string =
|
||||
for rune in runes:
|
||||
result.add rune
|
||||
|
||||
proc runeOffset*(s: string, pos: Natural, start: Natural = 0): int =
|
||||
proc runeOffset*(s: openArray[char], pos: Natural, start: Natural = 0): int =
|
||||
## Returns the byte position of rune
|
||||
## at position ``pos`` in ``s`` with an optional start byte position.
|
||||
## Returns the special value -1 if it runs out of the string.
|
||||
@@ -327,7 +337,7 @@ proc runeOffset*(s: string, pos: Natural, start: Natural = 0): int =
|
||||
inc i
|
||||
return o
|
||||
|
||||
proc runeReverseOffset*(s: string, rev: Positive): (int, int) =
|
||||
proc runeReverseOffset*(s: openArray[char], rev: Positive): (int, int) =
|
||||
## Returns a tuple with the byte offset of the
|
||||
## rune at position ``rev`` in ``s``, counting
|
||||
## from the end (starting with 1) and the total
|
||||
@@ -355,7 +365,7 @@ proc runeReverseOffset*(s: string, rev: Positive): (int, int) =
|
||||
dec a
|
||||
result = if a > 0: (-a, rev.int-a) else: (x, -a+rev.int)
|
||||
|
||||
proc runeAtPos*(s: string, pos: int): Rune =
|
||||
proc runeAtPos*(s: openArray[char], pos: int): Rune =
|
||||
## Returns the rune at position ``pos``.
|
||||
##
|
||||
## **Beware:** This can lead to unoptimized code and slow execution!
|
||||
@@ -368,7 +378,7 @@ proc runeAtPos*(s: string, pos: int): Rune =
|
||||
## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
|
||||
fastRuneAt(s, runeOffset(s, pos), result, false)
|
||||
|
||||
proc runeStrAtPos*(s: string, pos: Natural): string =
|
||||
proc runeStrAtPos*(s: openArray[char], pos: Natural): string =
|
||||
## Returns the rune at position ``pos`` as UTF8 String.
|
||||
##
|
||||
## **Beware:** This can lead to unoptimized code and slow execution!
|
||||
@@ -380,9 +390,9 @@ proc runeStrAtPos*(s: string, pos: Natural): string =
|
||||
## * `runeAtPos proc <#runeAtPos,string,int>`_
|
||||
## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
|
||||
let o = runeOffset(s, pos)
|
||||
s[o .. (o+runeLenAt(s, o)-1)]
|
||||
substr(s.toOpenArray(o, (o+runeLenAt(s, o)-1)))
|
||||
|
||||
proc runeSubStr*(s: string, pos: int, len: int = int.high): string =
|
||||
proc runeSubStr*(s: openArray[char], pos: int, len: int = int.high): string =
|
||||
## Returns the UTF-8 substring starting at code point ``pos``
|
||||
## with ``len`` code points.
|
||||
##
|
||||
@@ -401,7 +411,7 @@ proc runeSubStr*(s: string, pos: int, len: int = int.high): string =
|
||||
if pos < 0:
|
||||
let (o, rl) = runeReverseOffset(s, -pos)
|
||||
if len >= rl:
|
||||
result = s.substr(o, s.len-1)
|
||||
result = s.substr(o, s.high)
|
||||
elif len < 0:
|
||||
let e = rl + len
|
||||
if e < 0:
|
||||
@@ -626,7 +636,7 @@ template runeCheck(s, runeProc) =
|
||||
fastRuneAt(s, i, rune, doInc = true)
|
||||
result = runeProc(rune) and result
|
||||
|
||||
proc isAlpha*(s: string): bool {.noSideEffect,
|
||||
proc isAlpha*(s: openArray[char]): bool {.noSideEffect,
|
||||
rtl, extern: "nuc$1Str".} =
|
||||
## Returns true if ``s`` contains all alphabetic runes.
|
||||
runnableExamples:
|
||||
@@ -634,7 +644,7 @@ proc isAlpha*(s: string): bool {.noSideEffect,
|
||||
doAssert a.isAlpha
|
||||
runeCheck(s, isAlpha)
|
||||
|
||||
proc isSpace*(s: string): bool {.noSideEffect,
|
||||
proc isSpace*(s: openArray[char]): bool {.noSideEffect,
|
||||
rtl, extern: "nuc$1Str".} =
|
||||
## Returns true if ``s`` contains all whitespace runes.
|
||||
runnableExamples:
|
||||
@@ -655,21 +665,21 @@ template convertRune(s, runeProc) =
|
||||
rune = runeProc(rune)
|
||||
fastToUTF8Copy(rune, result, resultIndex, doInc = true)
|
||||
|
||||
proc toUpper*(s: string): string {.noSideEffect,
|
||||
proc toUpper*(s: openArray[char]): string {.noSideEffect,
|
||||
rtl, extern: "nuc$1Str".} =
|
||||
## Converts ``s`` into upper-case runes.
|
||||
runnableExamples:
|
||||
doAssert toUpper("abγ") == "ABΓ"
|
||||
convertRune(s, toUpper)
|
||||
|
||||
proc toLower*(s: string): string {.noSideEffect,
|
||||
proc toLower*(s: openArray[char]): string {.noSideEffect,
|
||||
rtl, extern: "nuc$1Str".} =
|
||||
## Converts ``s`` into lower-case runes.
|
||||
runnableExamples:
|
||||
doAssert toLower("ABΓ") == "abγ"
|
||||
convertRune(s, toLower)
|
||||
|
||||
proc swapCase*(s: string): string {.noSideEffect,
|
||||
proc swapCase*(s: openArray[char]): string {.noSideEffect,
|
||||
rtl, extern: "nuc$1".} =
|
||||
## Swaps the case of runes in ``s``.
|
||||
##
|
||||
@@ -691,7 +701,7 @@ proc swapCase*(s: string): string {.noSideEffect,
|
||||
rune = rune.toUpper()
|
||||
fastToUTF8Copy(rune, result, resultIndex, doInc = true)
|
||||
|
||||
proc capitalize*(s: string): string {.noSideEffect,
|
||||
proc capitalize*(s: openArray[char]): string {.noSideEffect,
|
||||
rtl, extern: "nuc$1".} =
|
||||
## Converts the first character of ``s`` into an upper-case rune.
|
||||
runnableExamples:
|
||||
@@ -703,12 +713,12 @@ proc capitalize*(s: string): string {.noSideEffect,
|
||||
rune: Rune
|
||||
i = 0
|
||||
fastRuneAt(s, i, rune, doInc = true)
|
||||
result = $toUpper(rune) & substr(s, i)
|
||||
result = $toUpper(rune) & substr(s.toOpenArray(i, s.high))
|
||||
|
||||
when not defined(nimHasEffectsOf):
|
||||
{.pragma: effectsOf.}
|
||||
|
||||
proc translate*(s: string, replacements: proc(key: string): string): string {.
|
||||
proc translate*(s: openArray[char], replacements: proc(key: string): string): string {.
|
||||
rtl, extern: "nuc$1", effectsOf: replacements.} =
|
||||
## Translates words in a string using the ``replacements`` proc to substitute
|
||||
## words inside ``s`` with their replacements.
|
||||
@@ -743,7 +753,7 @@ proc translate*(s: string, replacements: proc(key: string): string): string {.
|
||||
|
||||
if whiteSpace and inWord:
|
||||
# If we've reached the end of a word
|
||||
let word = s[wordStart ..< lastIndex]
|
||||
let word = substr(s.toOpenArray(wordStart, lastIndex - 1))
|
||||
result.add(replacements(word))
|
||||
result.add($rune)
|
||||
inWord = false
|
||||
@@ -758,10 +768,10 @@ proc translate*(s: string, replacements: proc(key: string): string): string {.
|
||||
|
||||
if wordStart < len(s) and inWord:
|
||||
# Get the trailing word at the end
|
||||
let word = s[wordStart .. ^1]
|
||||
let word = substr(s.toOpenArray(wordStart, s.high))
|
||||
result.add(replacements(word))
|
||||
|
||||
proc title*(s: string): string {.noSideEffect,
|
||||
proc title*(s: openArray[char]): string {.noSideEffect,
|
||||
rtl, extern: "nuc$1".} =
|
||||
## Converts ``s`` to a unicode title.
|
||||
##
|
||||
@@ -787,7 +797,7 @@ proc title*(s: string): string {.noSideEffect,
|
||||
fastToUTF8Copy(rune, result, resultIndex, doInc = true)
|
||||
|
||||
|
||||
iterator runes*(s: string): Rune =
|
||||
iterator runes*(s: openArray[char]): Rune =
|
||||
## Iterates over any rune of the string ``s`` returning runes.
|
||||
var
|
||||
i = 0
|
||||
@@ -796,7 +806,7 @@ iterator runes*(s: string): Rune =
|
||||
fastRuneAt(s, i, result, true)
|
||||
yield result
|
||||
|
||||
iterator utf8*(s: string): string =
|
||||
iterator utf8*(s: openArray[char]): string =
|
||||
## Iterates over any rune of the string ``s`` returning utf8 values.
|
||||
##
|
||||
## See also:
|
||||
@@ -807,10 +817,10 @@ iterator utf8*(s: string): string =
|
||||
var o = 0
|
||||
while o < s.len:
|
||||
let n = runeLenAt(s, o)
|
||||
yield s[o .. (o+n-1)]
|
||||
yield substr(s.toOpenArray(o, (o+n-1)))
|
||||
o += n
|
||||
|
||||
proc toRunes*(s: string): seq[Rune] =
|
||||
proc toRunes*(s: openArray[char]): seq[Rune] =
|
||||
## Obtains a sequence containing the Runes in ``s``.
|
||||
##
|
||||
## See also:
|
||||
@@ -823,7 +833,7 @@ proc toRunes*(s: string): seq[Rune] =
|
||||
for r in s.runes:
|
||||
result.add(r)
|
||||
|
||||
proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1".} =
|
||||
proc cmpRunesIgnoreCase*(a, b: openArray[char]): int {.rtl, extern: "nuc$1".} =
|
||||
## Compares two UTF-8 strings and ignores the case. Returns:
|
||||
##
|
||||
## | 0 if a == b
|
||||
@@ -840,7 +850,7 @@ proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1".} =
|
||||
if result != 0: return
|
||||
result = a.len - b.len
|
||||
|
||||
proc reversed*(s: string): string =
|
||||
proc reversed*(s: openArray[char]): string =
|
||||
## Returns the reverse of ``s``, interpreting it as runes.
|
||||
##
|
||||
## Unicode combining characters are correctly interpreted as well.
|
||||
@@ -875,7 +885,7 @@ proc reversed*(s: string): string =
|
||||
|
||||
reverseUntil(len(s))
|
||||
|
||||
proc graphemeLen*(s: string; i: Natural): Natural =
|
||||
proc graphemeLen*(s: openArray[char]; i: Natural): Natural =
|
||||
## The number of bytes belonging to byte index ``s[i]``,
|
||||
## including following combining code unit.
|
||||
runnableExamples:
|
||||
@@ -894,7 +904,7 @@ proc graphemeLen*(s: string; i: Natural): Natural =
|
||||
if not isCombining(r2): break
|
||||
result = j-i
|
||||
|
||||
proc lastRune*(s: string; last: int): (Rune, int) =
|
||||
proc lastRune*(s: openArray[char]; last: int): (Rune, int) =
|
||||
## Length of the last rune in ``s[0..last]``. Returns the rune and its length
|
||||
## in bytes.
|
||||
if s[last] <= chr(127):
|
||||
@@ -923,12 +933,12 @@ proc size*(r: Rune): int {.noSideEffect.} =
|
||||
else: result = 1
|
||||
|
||||
# --------- Private templates for different split separators -----------
|
||||
proc stringHasSep(s: string, index: int, seps: openArray[Rune]): bool =
|
||||
proc stringHasSep(s: openArray[char], index: int, seps: openArray[Rune]): bool =
|
||||
var rune: Rune
|
||||
fastRuneAt(s, index, rune, false)
|
||||
return seps.contains(rune)
|
||||
|
||||
proc stringHasSep(s: string, index: int, sep: Rune): bool =
|
||||
proc stringHasSep(s: openArray[char], index: int, sep: Rune): bool =
|
||||
var rune: Rune
|
||||
fastRuneAt(s, index, rune, false)
|
||||
return sep == rune
|
||||
@@ -946,12 +956,12 @@ template splitCommon(s, sep, maxsplit: untyped) =
|
||||
while last < sLen and not stringHasSep(s, last, sep):
|
||||
inc(last, runeLenAt(s, last))
|
||||
if splits == 0: last = sLen
|
||||
yield s[first .. (last - 1)]
|
||||
yield substr(s.toOpenArray(first, (last - 1)))
|
||||
if splits == 0: break
|
||||
dec(splits)
|
||||
inc(last, if last < sLen: runeLenAt(s, last) else: 1)
|
||||
|
||||
iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces,
|
||||
iterator split*(s: openArray[char], seps: openArray[Rune] = unicodeSpaces,
|
||||
maxsplit: int = -1): string =
|
||||
## Splits the unicode string ``s`` into substrings using a group of separators.
|
||||
##
|
||||
@@ -977,7 +987,7 @@ iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces,
|
||||
|
||||
splitCommon(s, seps, maxsplit)
|
||||
|
||||
iterator splitWhitespace*(s: string): string =
|
||||
iterator splitWhitespace*(s: openArray[char]): string =
|
||||
## Splits a unicode string at whitespace runes.
|
||||
splitCommon(s, unicodeSpaces, -1)
|
||||
|
||||
@@ -985,13 +995,13 @@ template accResult(iter: untyped) =
|
||||
result = @[]
|
||||
for x in iter: add(result, x)
|
||||
|
||||
proc splitWhitespace*(s: string): seq[string] {.noSideEffect,
|
||||
proc splitWhitespace*(s: openArray[char]): seq[string] {.noSideEffect,
|
||||
rtl, extern: "ncuSplitWhitespace".} =
|
||||
## The same as the `splitWhitespace <#splitWhitespace.i,string>`_
|
||||
## iterator, but is a proc that returns a sequence of substrings.
|
||||
accResult(splitWhitespace(s))
|
||||
|
||||
iterator split*(s: string, sep: Rune, maxsplit: int = -1): string =
|
||||
iterator split*(s: openArray[char], sep: Rune, maxsplit: int = -1): string =
|
||||
## Splits the unicode string ``s`` into substrings using a single separator.
|
||||
## Substrings are separated by the rune ``sep``.
|
||||
runnableExamples:
|
||||
@@ -1002,19 +1012,19 @@ iterator split*(s: string, sep: Rune, maxsplit: int = -1): string =
|
||||
|
||||
splitCommon(s, sep, maxsplit)
|
||||
|
||||
proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1):
|
||||
proc split*(s: openArray[char], seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1):
|
||||
seq[string] {.noSideEffect, rtl, extern: "nucSplitRunes".} =
|
||||
## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_,
|
||||
## but is a proc that returns a sequence of substrings.
|
||||
accResult(split(s, seps, maxsplit))
|
||||
|
||||
proc split*(s: string, sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect,
|
||||
proc split*(s: openArray[char], sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect,
|
||||
rtl, extern: "nucSplitRune".} =
|
||||
## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc
|
||||
## that returns a sequence of substrings.
|
||||
accResult(split(s, sep, maxsplit))
|
||||
|
||||
proc strip*(s: string, leading = true, trailing = true,
|
||||
proc strip*(s: openArray[char], leading = true, trailing = true,
|
||||
runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect,
|
||||
rtl, extern: "nucStrip".} =
|
||||
## Strips leading or trailing ``runes`` from ``s`` and returns
|
||||
@@ -1069,7 +1079,7 @@ proc strip*(s: string, leading = true, trailing = true,
|
||||
let newLen = eI - sI + 1
|
||||
result = newStringOfCap(newLen)
|
||||
if newLen > 0:
|
||||
result.add s[sI .. eI]
|
||||
result.add substr(s.toOpenArray(sI, eI))
|
||||
|
||||
proc repeat*(c: Rune, count: Natural): string {.noSideEffect,
|
||||
rtl, extern: "nucRepeatRune".} =
|
||||
@@ -1085,7 +1095,7 @@ proc repeat*(c: Rune, count: Natural): string {.noSideEffect,
|
||||
for i in 0 ..< count:
|
||||
result.add s
|
||||
|
||||
proc align*(s: string, count: Natural, padding = ' '.Rune): string {.
|
||||
proc align*(s: openArray[char], count: Natural, padding = ' '.Rune): string {.
|
||||
noSideEffect, rtl, extern: "nucAlignString".} =
|
||||
## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length
|
||||
## of ``count``.
|
||||
@@ -1110,9 +1120,9 @@ proc align*(s: string, count: Natural, padding = ' '.Rune): string {.
|
||||
for i in 0 ..< spaces: result.add padStr
|
||||
result.add s
|
||||
else:
|
||||
result = s
|
||||
result = s.substr
|
||||
|
||||
proc alignLeft*(s: string, count: Natural, padding = ' '.Rune): string {.
|
||||
proc alignLeft*(s: openArray[char], count: Natural, padding = ' '.Rune): string {.
|
||||
noSideEffect.} =
|
||||
## Left-aligns a unicode string ``s`` with ``padding``, so that it has a
|
||||
## rune-length of ``count``.
|
||||
@@ -1136,4 +1146,365 @@ proc alignLeft*(s: string, count: Natural, padding = ' '.Rune): string {.
|
||||
for i in sLen ..< count:
|
||||
result.add padStr
|
||||
else:
|
||||
result = s
|
||||
result = s.substr
|
||||
|
||||
|
||||
proc runeLen*(s: string): int {.inline.} =
|
||||
## Returns the number of runes of the string ``s``.
|
||||
runnableExamples:
|
||||
let a = "añyóng"
|
||||
doAssert a.runeLen == 6
|
||||
## note: a.len == 8
|
||||
runeLen(toOa(s))
|
||||
|
||||
proc runeLenAt*(s: string, i: Natural): int {.inline.} =
|
||||
## Returns the number of bytes the rune starting at ``s[i]`` takes.
|
||||
##
|
||||
## See also:
|
||||
## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
|
||||
runnableExamples:
|
||||
let a = "añyóng"
|
||||
doAssert a.runeLenAt(0) == 1
|
||||
doAssert a.runeLenAt(1) == 2
|
||||
runeLenAt(toOa(s), i)
|
||||
|
||||
proc runeAt*(s: string, i: Natural): Rune {.inline.} =
|
||||
## Returns the rune in ``s`` at **byte index** ``i``.
|
||||
##
|
||||
## See also:
|
||||
## * `runeAtPos proc <#runeAtPos,string,int>`_
|
||||
## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
|
||||
## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
|
||||
runnableExamples:
|
||||
let a = "añyóng"
|
||||
doAssert a.runeAt(1) == "ñ".runeAt(0)
|
||||
doAssert a.runeAt(2) == "ñ".runeAt(1)
|
||||
doAssert a.runeAt(3) == "y".runeAt(0)
|
||||
fastRuneAt(s, i, result, false)
|
||||
|
||||
proc validateUtf8*(s: string): int {.inline.} =
|
||||
## Returns the position of the invalid byte in ``s`` if the string ``s`` does
|
||||
## not hold valid UTF-8 data. Otherwise ``-1`` is returned.
|
||||
##
|
||||
## See also:
|
||||
## * `toUTF8 proc <#toUTF8,Rune>`_
|
||||
## * `$ proc <#$,Rune>`_ alias for `toUTF8`
|
||||
## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
|
||||
validateUtf8(toOa(s))
|
||||
|
||||
proc runeOffset*(s: string, pos: Natural, start: Natural = 0): int {.inline.} =
|
||||
## Returns the byte position of rune
|
||||
## at position ``pos`` in ``s`` with an optional start byte position.
|
||||
## Returns the special value -1 if it runs out of the string.
|
||||
##
|
||||
## **Beware:** This can lead to unoptimized code and slow execution!
|
||||
## Most problems can be solved more efficiently by using an iterator
|
||||
## or conversion to a seq of Rune.
|
||||
##
|
||||
## See also:
|
||||
## * `runeReverseOffset proc <#runeReverseOffset,string,Positive>`_
|
||||
runnableExamples:
|
||||
let a = "añyóng"
|
||||
doAssert a.runeOffset(1) == 1
|
||||
doAssert a.runeOffset(3) == 4
|
||||
doAssert a.runeOffset(4) == 6
|
||||
runeOffset(toOa(s), pos, start)
|
||||
|
||||
proc runeReverseOffset*(s: string, rev: Positive): (int, int) {.inline.} =
|
||||
## Returns a tuple with the byte offset of the
|
||||
## rune at position ``rev`` in ``s``, counting
|
||||
## from the end (starting with 1) and the total
|
||||
## number of runes in the string.
|
||||
##
|
||||
## Returns a negative value for offset if there are too few runes in
|
||||
## the string to satisfy the request.
|
||||
##
|
||||
## **Beware:** This can lead to unoptimized code and slow execution!
|
||||
## Most problems can be solved more efficiently by using an iterator
|
||||
## or conversion to a seq of Rune.
|
||||
##
|
||||
## See also:
|
||||
## * `runeOffset proc <#runeOffset,string,Natural,Natural>`_
|
||||
runeReverseOffset(toOa(s), rev)
|
||||
|
||||
proc runeAtPos*(s: string, pos: int): Rune {.inline.} =
|
||||
## Returns the rune at position ``pos``.
|
||||
##
|
||||
## **Beware:** This can lead to unoptimized code and slow execution!
|
||||
## Most problems can be solved more efficiently by using an iterator
|
||||
## or conversion to a seq of Rune.
|
||||
##
|
||||
## See also:
|
||||
## * `runeAt proc <#runeAt,string,Natural>`_
|
||||
## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_
|
||||
## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
|
||||
fastRuneAt(toOa(s), runeOffset(s, pos), result, false)
|
||||
|
||||
proc runeStrAtPos*(s: string, pos: Natural): string {.inline.} =
|
||||
## Returns the rune at position ``pos`` as UTF8 String.
|
||||
##
|
||||
## **Beware:** This can lead to unoptimized code and slow execution!
|
||||
## Most problems can be solved more efficiently by using an iterator
|
||||
## or conversion to a seq of Rune.
|
||||
##
|
||||
## See also:
|
||||
## * `runeAt proc <#runeAt,string,Natural>`_
|
||||
## * `runeAtPos proc <#runeAtPos,string,int>`_
|
||||
## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_
|
||||
let o = runeOffset(s, pos)
|
||||
substr(s.toOpenArray(o, (o+runeLenAt(s, o)-1)))
|
||||
|
||||
proc runeSubStr*(s: string, pos: int, len: int = int.high): string {.inline.} =
|
||||
## Returns the UTF-8 substring starting at code point ``pos``
|
||||
## with ``len`` code points.
|
||||
##
|
||||
## If ``pos`` or ``len`` is negative they count from
|
||||
## the end of the string. If ``len`` is not given it means the longest
|
||||
## possible string.
|
||||
runnableExamples:
|
||||
let s = "Hänsel ««: 10,00€"
|
||||
doAssert(runeSubStr(s, 0, 2) == "Hä")
|
||||
doAssert(runeSubStr(s, 10, 1) == ":")
|
||||
doAssert(runeSubStr(s, -6) == "10,00€")
|
||||
doAssert(runeSubStr(s, 10) == ": 10,00€")
|
||||
doAssert(runeSubStr(s, 12, 5) == "10,00")
|
||||
doAssert(runeSubStr(s, -6, 3) == "10,")
|
||||
runeSubStr(toOa(s), pos, len)
|
||||
|
||||
|
||||
proc isAlpha*(s: string): bool {.noSideEffect, inline.} =
|
||||
## Returns true if ``s`` contains all alphabetic runes.
|
||||
runnableExamples:
|
||||
let a = "añyóng"
|
||||
doAssert a.isAlpha
|
||||
isAlpha(toOa(s))
|
||||
|
||||
proc isSpace*(s: string): bool {.noSideEffect, inline.} =
|
||||
## Returns true if ``s`` contains all whitespace runes.
|
||||
runnableExamples:
|
||||
let a = "\t\l \v\r\f"
|
||||
doAssert a.isSpace
|
||||
isSpace(toOa(s))
|
||||
|
||||
|
||||
proc toUpper*(s: string): string {.noSideEffect, inline.} =
|
||||
## Converts ``s`` into upper-case runes.
|
||||
runnableExamples:
|
||||
doAssert toUpper("abγ") == "ABΓ"
|
||||
toUpper(toOa(s))
|
||||
|
||||
proc toLower*(s: string): string {.noSideEffect, inline.} =
|
||||
## Converts ``s`` into lower-case runes.
|
||||
runnableExamples:
|
||||
doAssert toLower("ABΓ") == "abγ"
|
||||
toLower(toOa(s))
|
||||
|
||||
proc swapCase*(s: string): string {.noSideEffect, inline.} =
|
||||
## Swaps the case of runes in ``s``.
|
||||
##
|
||||
## Returns a new string such that the cases of all runes
|
||||
## are swapped if possible.
|
||||
runnableExamples:
|
||||
doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA"
|
||||
swapCase(toOa(s))
|
||||
|
||||
proc capitalize*(s: string): string {.noSideEffect.} =
|
||||
## Converts the first character of ``s`` into an upper-case rune.
|
||||
runnableExamples:
|
||||
doAssert capitalize("βeta") == "Βeta"
|
||||
capitalize(toOa(s))
|
||||
|
||||
|
||||
proc translate*(s: string, replacements: proc(key: string): string): string {.effectsOf: replacements, inline.} =
|
||||
## Translates words in a string using the ``replacements`` proc to substitute
|
||||
## words inside ``s`` with their replacements.
|
||||
##
|
||||
## ``replacements`` is any proc that takes a word and returns
|
||||
## a new word to fill it's place.
|
||||
runnableExamples:
|
||||
proc wordToNumber(s: string): string =
|
||||
case s
|
||||
of "one": "1"
|
||||
of "two": "2"
|
||||
else: s
|
||||
let a = "one two three four"
|
||||
doAssert a.translate(wordToNumber) == "1 2 three four"
|
||||
translate(toOa(s), replacements)
|
||||
|
||||
proc title*(s: string): string {.noSideEffect, inline.} =
|
||||
## Converts ``s`` to a unicode title.
|
||||
##
|
||||
## Returns a new string such that the first character
|
||||
## in each word inside ``s`` is capitalized.
|
||||
runnableExamples:
|
||||
doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma"
|
||||
title(toOa(s))
|
||||
|
||||
|
||||
iterator runes*(s: string): Rune =
|
||||
## Iterates over any rune of the string ``s`` returning runes.
|
||||
for rune in runes(toOa(s)):
|
||||
yield rune
|
||||
|
||||
iterator utf8*(s: string): string =
|
||||
## Iterates over any rune of the string ``s`` returning utf8 values.
|
||||
##
|
||||
## See also:
|
||||
## * `validateUtf8 proc <#validateUtf8,string>`_
|
||||
## * `toUTF8 proc <#toUTF8,Rune>`_
|
||||
## * `$ proc <#$,Rune>`_ alias for `toUTF8`
|
||||
## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_
|
||||
for str in utf8(toOa(s)):
|
||||
yield str
|
||||
|
||||
proc toRunes*(s: string): seq[Rune] {.inline.} =
|
||||
## Obtains a sequence containing the Runes in ``s``.
|
||||
##
|
||||
## See also:
|
||||
## * `$ proc <#$,Rune>`_ for a reverse operation
|
||||
runnableExamples:
|
||||
let a = toRunes("aáä")
|
||||
doAssert a == @["a".runeAt(0), "á".runeAt(0), "ä".runeAt(0)]
|
||||
toRunes(toOa(s))
|
||||
|
||||
proc cmpRunesIgnoreCase*(a, b: string): int {.inline.} =
|
||||
## Compares two UTF-8 strings and ignores the case. Returns:
|
||||
##
|
||||
## | 0 if a == b
|
||||
## | < 0 if a < b
|
||||
## | > 0 if a > b
|
||||
cmpRunesIgnoreCase(a.toOa(), b.toOa())
|
||||
|
||||
proc reversed*(s: string): string {.inline.} =
|
||||
## Returns the reverse of ``s``, interpreting it as runes.
|
||||
##
|
||||
## Unicode combining characters are correctly interpreted as well.
|
||||
runnableExamples:
|
||||
assert reversed("Reverse this!") == "!siht esreveR"
|
||||
assert reversed("先秦兩漢") == "漢兩秦先"
|
||||
assert reversed("as⃝df̅") == "f̅ds⃝a"
|
||||
assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
|
||||
reversed(toOa(s))
|
||||
|
||||
proc graphemeLen*(s: string; i: Natural): Natural {.inline.} =
|
||||
## The number of bytes belonging to byte index ``s[i]``,
|
||||
## including following combining code unit.
|
||||
runnableExamples:
|
||||
let a = "añyóng"
|
||||
doAssert a.graphemeLen(1) == 2 ## ñ
|
||||
doAssert a.graphemeLen(2) == 1
|
||||
doAssert a.graphemeLen(4) == 2 ## ó
|
||||
graphemeLen(toOa(s), i)
|
||||
|
||||
proc lastRune*(s: string; last: int): (Rune, int) {.inline.} =
|
||||
## Length of the last rune in ``s[0..last]``. Returns the rune and its length
|
||||
## in bytes.
|
||||
lastRune(toOa(s), last)
|
||||
|
||||
iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces,
|
||||
maxsplit: int = -1): string =
|
||||
## Splits the unicode string ``s`` into substrings using a group of separators.
|
||||
##
|
||||
## Substrings are separated by a substring containing only ``seps``.
|
||||
runnableExamples:
|
||||
import std/sequtils
|
||||
|
||||
assert toSeq("hÃllo\lthis\lis an\texample\l是".split) ==
|
||||
@["hÃllo", "this", "is", "an", "example", "是"]
|
||||
|
||||
# And the following code splits the same string using a sequence of Runes.
|
||||
assert toSeq(split("añyóng:hÃllo;是$example", ";:$".toRunes)) ==
|
||||
@["añyóng", "hÃllo", "是", "example"]
|
||||
|
||||
# example with a `Rune` separator and unused one `;`:
|
||||
assert toSeq(split("ab是de:f:", ";:是".toRunes)) == @["ab", "de", "f", ""]
|
||||
|
||||
# Another example that splits a string containing a date.
|
||||
let date = "2012-11-20T22:08:08.398990"
|
||||
|
||||
assert toSeq(split(date, " -:T".toRunes)) ==
|
||||
@["2012", "11", "20", "22", "08", "08.398990"]
|
||||
|
||||
splitCommon(toOa(s), seps, maxsplit)
|
||||
|
||||
iterator splitWhitespace*(s: string): string =
|
||||
## Splits a unicode string at whitespace runes.
|
||||
splitCommon(s.toOa(), unicodeSpaces, -1)
|
||||
|
||||
|
||||
proc splitWhitespace*(s: string): seq[string] {.noSideEffect, inline.}=
|
||||
## The same as the `splitWhitespace <#splitWhitespace.i,string>`_
|
||||
## iterator, but is a proc that returns a sequence of substrings.
|
||||
accResult(splitWhitespace(toOa(s)))
|
||||
|
||||
iterator split*(s: string, sep: Rune, maxsplit: int = -1): string =
|
||||
## Splits the unicode string ``s`` into substrings using a single separator.
|
||||
## Substrings are separated by the rune ``sep``.
|
||||
runnableExamples:
|
||||
import std/sequtils
|
||||
|
||||
assert toSeq(split(";;hÃllo;this;is;an;;example;;;是", ";".runeAt(0))) ==
|
||||
@["", "", "hÃllo", "this", "is", "an", "", "example", "", "", "是"]
|
||||
|
||||
splitCommon(toOa(s), sep, maxsplit)
|
||||
|
||||
proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1):
|
||||
seq[string] {.noSideEffect, inline.} =
|
||||
## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_,
|
||||
## but is a proc that returns a sequence of substrings.
|
||||
accResult(split(toOa(s), seps, maxsplit))
|
||||
|
||||
proc split*(s: string, sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect, inline.} =
|
||||
## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc
|
||||
## that returns a sequence of substrings.
|
||||
accResult(split(toOa(s), sep, maxsplit))
|
||||
|
||||
proc strip*(s: string, leading = true, trailing = true,
|
||||
runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect, inline.} =
|
||||
## Strips leading or trailing ``runes`` from ``s`` and returns
|
||||
## the resulting string.
|
||||
##
|
||||
## If ``leading`` is true (default), leading ``runes`` are stripped.
|
||||
## If ``trailing`` is true (default), trailing ``runes`` are stripped.
|
||||
## If both are false, the string is returned unchanged.
|
||||
runnableExamples:
|
||||
let a = "\táñyóng "
|
||||
doAssert a.strip == "áñyóng"
|
||||
doAssert a.strip(leading = false) == "\táñyóng"
|
||||
doAssert a.strip(trailing = false) == "áñyóng "
|
||||
strip(toOa(s), leading, trailing, runes)
|
||||
|
||||
|
||||
proc align*(s: string, count: Natural, padding = ' '.Rune): string {.noSideEffect, inline.} =
|
||||
## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length
|
||||
## of ``count``.
|
||||
##
|
||||
## ``padding`` characters (by default spaces) are added before ``s`` resulting in
|
||||
## right alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
|
||||
## returned unchanged. If you need to left align a string use the `alignLeft
|
||||
## proc <#alignLeft,string,Natural>`_.
|
||||
runnableExamples:
|
||||
assert align("abc", 4) == " abc"
|
||||
assert align("a", 0) == "a"
|
||||
assert align("1232", 6) == " 1232"
|
||||
assert align("1232", 6, '#'.Rune) == "##1232"
|
||||
assert align("Åge", 5) == " Åge"
|
||||
assert align("×", 4, '_'.Rune) == "___×"
|
||||
align(toOa(s), count, padding)
|
||||
|
||||
proc alignLeft*(s: string, count: Natural, padding = ' '.Rune): string {.noSideEffect, inline.} =
|
||||
## Left-aligns a unicode string ``s`` with ``padding``, so that it has a
|
||||
## rune-length of ``count``.
|
||||
##
|
||||
## ``padding`` characters (by default spaces) are added after ``s`` resulting in
|
||||
## left alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is
|
||||
## returned unchanged. If you need to right align a string use the `align
|
||||
## proc <#align,string,Natural>`_.
|
||||
runnableExamples:
|
||||
assert alignLeft("abc", 4) == "abc "
|
||||
assert alignLeft("a", 0) == "a"
|
||||
assert alignLeft("1232", 6) == "1232 "
|
||||
assert alignLeft("1232", 6, '#'.Rune) == "1232##"
|
||||
assert alignLeft("Åge", 5) == "Åge "
|
||||
assert alignLeft("×", 4, '_'.Rune) == "×___"
|
||||
alignLeft(toOa(s), count, padding)
|
||||
|
||||
Reference in New Issue
Block a user