mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 17:34:43 +00:00
Optimize unicode.reversed
Runs about 18 times faster: - combining characters with boolean logic instead of binary search - No more temporary sequence - Optimize for ASCII characters
This commit is contained in:
@@ -1102,13 +1102,6 @@ const
|
||||
0x01f1, 501, #
|
||||
0x01f3, 499] #
|
||||
|
||||
combiningRanges = [
|
||||
0x0300, 0x036f,
|
||||
0x1ab0, 0x1aff,
|
||||
0x1dc0, 0x1dff,
|
||||
0x20d0, 0x20ff,
|
||||
0xfe20, 0xfe2f]
|
||||
|
||||
proc binarySearch(c: RuneImpl, tab: openArray[RuneImpl], len, stride: int): int =
|
||||
var n = len
|
||||
var t = 0
|
||||
@@ -1204,9 +1197,13 @@ proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
|
||||
proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
|
||||
## returns true iff `c` is a Unicode combining character
|
||||
var c = RuneImpl(c)
|
||||
var p = binarySearch(c, combiningRanges, len(combiningRanges) div 2, 2)
|
||||
if p >= 0 and c >= combiningRanges[p] and c <= combiningRanges[p+1]:
|
||||
return true
|
||||
|
||||
# Optimized to return false immediately for ASCII
|
||||
return c >= 0x0300 and (c <= 0x036f or
|
||||
(c >= 0x1ab0 and c <= 0x1aff) or
|
||||
(c >= 0x1dc0 and c <= 0x1dff) or
|
||||
(c >= 0x20d0 and c <= 0x20ff) or
|
||||
(c >= 0xfe20 and c <= 0xfe2f))
|
||||
|
||||
iterator runes*(s: string): Rune =
|
||||
## iterates over any unicode character of the string `s`.
|
||||
@@ -1243,15 +1240,30 @@ proc reversed*(s: string): string =
|
||||
## assert reversed("先秦兩漢") == "漢兩秦先"
|
||||
## assert reversed("as⃝df̅") == "f̅ds⃝a"
|
||||
## assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
|
||||
result = newStringOfCap(len(s))
|
||||
var tmp: seq[Rune] = @[]
|
||||
for r in runes(s):
|
||||
if isCombining(r):
|
||||
tmp.insert(r, high(tmp))
|
||||
else:
|
||||
tmp.add(r)
|
||||
for i in countdown(high(tmp), 0):
|
||||
result.add(toUTF8(tmp[i]))
|
||||
var
|
||||
i = 0
|
||||
lastI = 0
|
||||
newPos = len(s) - 1
|
||||
blockPos = 0
|
||||
r: Rune
|
||||
|
||||
template reverseUntil(pos): stmt =
|
||||
var j = pos - 1
|
||||
while j > blockPos:
|
||||
result[newPos] = s[j]
|
||||
dec j
|
||||
dec newPos
|
||||
blockPos = pos - 1
|
||||
|
||||
result = newString(len(s))
|
||||
|
||||
while i < len(s):
|
||||
lastI = i
|
||||
fastRuneAt(s, i, r, true)
|
||||
if not isCombining(r):
|
||||
reverseUntil(lastI)
|
||||
|
||||
reverseUntil(len(s))
|
||||
|
||||
when isMainModule:
|
||||
let
|
||||
|
||||
Reference in New Issue
Block a user