Optimize unicode.reversed

Runs about 18 times faster:
- combining characters with boolean logic instead of binary search
- No more temporary sequence
- Optimize for ASCII characters
This commit is contained in:
def
2015-01-15 23:11:02 +01:00
parent 0a82b6eb62
commit ae7ca46a09

View File

@@ -1102,13 +1102,6 @@ const
0x01f1, 501, #
0x01f3, 499] #
combiningRanges = [
0x0300, 0x036f,
0x1ab0, 0x1aff,
0x1dc0, 0x1dff,
0x20d0, 0x20ff,
0xfe20, 0xfe2f]
proc binarySearch(c: RuneImpl, tab: openArray[RuneImpl], len, stride: int): int =
var n = len
var t = 0
@@ -1204,9 +1197,13 @@ proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
## returns true iff `c` is a Unicode combining character
var c = RuneImpl(c)
var p = binarySearch(c, combiningRanges, len(combiningRanges) div 2, 2)
if p >= 0 and c >= combiningRanges[p] and c <= combiningRanges[p+1]:
return true
# Optimized to return false immediately for ASCII
return c >= 0x0300 and (c <= 0x036f or
(c >= 0x1ab0 and c <= 0x1aff) or
(c >= 0x1dc0 and c <= 0x1dff) or
(c >= 0x20d0 and c <= 0x20ff) or
(c >= 0xfe20 and c <= 0xfe2f))
iterator runes*(s: string): Rune =
## iterates over any unicode character of the string `s`.
@@ -1243,15 +1240,30 @@ proc reversed*(s: string): string =
## assert reversed("先秦兩漢") == "漢兩秦先"
## assert reversed("as⃝df̅") == "f̅ds⃝a"
## assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞"
result = newStringOfCap(len(s))
var tmp: seq[Rune] = @[]
for r in runes(s):
if isCombining(r):
tmp.insert(r, high(tmp))
else:
tmp.add(r)
for i in countdown(high(tmp), 0):
result.add(toUTF8(tmp[i]))
var
i = 0
lastI = 0
newPos = len(s) - 1
blockPos = 0
r: Rune
template reverseUntil(pos): stmt =
var j = pos - 1
while j > blockPos:
result[newPos] = s[j]
dec j
dec newPos
blockPos = pos - 1
result = newString(len(s))
while i < len(s):
lastI = i
fastRuneAt(s, i, r, true)
if not isCombining(r):
reverseUntil(lastI)
reverseUntil(len(s))
when isMainModule:
let