mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 17:34:43 +00:00
* Improve ropes module Improve wording Update runnableExamples Add index bound check for `[]` Minor format changes * Update tests/stdlib/tropes.nim Run on C & JS target Test in VM Add tests for `[]`
398 lines
11 KiB
Nim
398 lines
11 KiB
Nim
#
|
|
#
|
|
# Nim's Runtime Library
|
|
# (c) Copyright 2010 Andreas Rumpf
|
|
#
|
|
# See the file "copying.txt", included in this
|
|
# distribution, for details about the copyright.
|
|
#
|
|
|
|
## This module contains support for a `rope`:idx: data type.
|
|
## Ropes can represent very long strings efficiently; in particular, concatenation
|
|
## is done in O(1) instead of O(n). They are essentially concatenation
|
|
## trees that are only flattened when converting to a native Nim
|
|
## string. The empty string is represented by `nil`. Ropes are immutable and
|
|
## subtrees can be shared without copying.
|
|
## Leaves can be cached for better memory efficiency at the cost of
|
|
## runtime efficiency.
|
|
|
|
include system/inclrtl
|
|
import std/streams
|
|
|
|
{.push debugger: off.} # the user does not want to trace a part
|
|
# of the standard library!
|
|
|
|
const
|
|
countCacheMisses = false
|
|
|
|
var
|
|
cacheEnabled = false
|
|
|
|
type
|
|
Rope* {.acyclic.} = ref object
|
|
## A rope data type. The empty rope is represented by `nil`.
|
|
left, right: Rope
|
|
length: int
|
|
data: string # not empty if a leaf
|
|
|
|
# Note that the left and right pointers are not needed for leafs.
|
|
# Leaves have relatively high memory overhead (~30 bytes on a 32
|
|
# bit machine) and we produce many of them. This is why we cache and
|
|
# share leafs across different rope trees.
|
|
# To cache them they are inserted in another tree, a splay tree for best
|
|
# performance. But for the caching tree we use the leaf's left and right
|
|
# pointers.
|
|
|
|
proc len*(a: Rope): int {.rtl, extern: "nro$1".} =
|
|
## The rope's length.
|
|
if a == nil: 0 else: a.length
|
|
|
|
proc newRope(): Rope = new(result)
|
|
proc newRope(data: string): Rope =
|
|
new(result)
|
|
result.length = len(data)
|
|
result.data = data
|
|
|
|
var
|
|
cache {.threadvar.}: Rope # the root of the cache tree
|
|
N {.threadvar.}: Rope # dummy rope needed for splay algorithm
|
|
|
|
when countCacheMisses:
|
|
var misses, hits: int
|
|
|
|
proc splay(s: string, tree: Rope, cmpres: var int): Rope =
|
|
var c: int
|
|
var t = tree
|
|
N.left = nil
|
|
N.right = nil # reset to nil
|
|
var le = N
|
|
var r = N
|
|
while true:
|
|
c = cmp(s, t.data)
|
|
if c < 0:
|
|
if (t.left != nil) and (s < t.left.data):
|
|
var y = t.left
|
|
t.left = y.right
|
|
y.right = t
|
|
t = y
|
|
if t.left == nil: break
|
|
r.left = t
|
|
r = t
|
|
t = t.left
|
|
elif c > 0:
|
|
if (t.right != nil) and (s > t.right.data):
|
|
var y = t.right
|
|
t.right = y.left
|
|
y.left = t
|
|
t = y
|
|
if t.right == nil: break
|
|
le.right = t
|
|
le = t
|
|
t = t.right
|
|
else:
|
|
break
|
|
cmpres = c
|
|
le.right = t.left
|
|
r.left = t.right
|
|
t.left = N.right
|
|
t.right = N.left
|
|
result = t
|
|
|
|
proc insertInCache(s: string, tree: Rope): Rope =
|
|
var t = tree
|
|
if t == nil:
|
|
result = newRope(s)
|
|
when countCacheMisses: inc(misses)
|
|
return
|
|
var cmp: int
|
|
t = splay(s, t, cmp)
|
|
if cmp == 0:
|
|
# We get here if it's already in the Tree
|
|
# Don't add it again
|
|
result = t
|
|
when countCacheMisses: inc(hits)
|
|
else:
|
|
when countCacheMisses: inc(misses)
|
|
result = newRope(s)
|
|
if cmp < 0:
|
|
result.left = t.left
|
|
result.right = t
|
|
t.left = nil
|
|
else:
|
|
# i > t.item:
|
|
result.right = t.right
|
|
result.left = t
|
|
t.right = nil
|
|
|
|
proc rope*(s: string = ""): Rope {.rtl, extern: "nro$1Str".} =
|
|
## Converts a string to a rope.
|
|
runnableExamples:
|
|
let r = rope("I'm a rope")
|
|
doAssert $r == "I'm a rope"
|
|
|
|
if s.len == 0:
|
|
result = nil
|
|
else:
|
|
when nimvm:
|
|
# No caching in VM context
|
|
result = newRope(s)
|
|
else:
|
|
if cacheEnabled:
|
|
result = insertInCache(s, cache)
|
|
cache = result
|
|
else:
|
|
result = newRope(s)
|
|
|
|
proc rope*(i: BiggestInt): Rope {.rtl, extern: "nro$1BiggestInt".} =
|
|
## Converts an int to a rope.
|
|
runnableExamples:
|
|
let r = rope(429)
|
|
doAssert $r == "429"
|
|
|
|
result = rope($i)
|
|
|
|
proc rope*(f: BiggestFloat): Rope {.rtl, extern: "nro$1BiggestFloat".} =
|
|
## Converts a float to a rope.
|
|
runnableExamples:
|
|
let r = rope(4.29)
|
|
doAssert $r == "4.29"
|
|
|
|
result = rope($f)
|
|
|
|
proc enableCache*() {.rtl, extern: "nro$1".} =
|
|
## Enables the caching of leaves. This reduces the memory footprint at
|
|
## the cost of runtime efficiency.
|
|
cacheEnabled = true
|
|
|
|
proc disableCache*() {.rtl, extern: "nro$1".} =
|
|
## The cache is discarded and disabled. The GC will reuse its used memory.
|
|
cache = nil
|
|
cacheEnabled = false
|
|
|
|
proc `&`*(a, b: Rope): Rope {.rtl, extern: "nroConcRopeRope".} =
|
|
## The concatenation operator for ropes.
|
|
runnableExamples:
|
|
let r = rope("Hello, ") & rope("Nim!")
|
|
doAssert $r == "Hello, Nim!"
|
|
|
|
if a == nil:
|
|
result = b
|
|
elif b == nil:
|
|
result = a
|
|
else:
|
|
result = newRope()
|
|
result.length = a.length + b.length
|
|
result.left = a
|
|
result.right = b
|
|
|
|
proc `&`*(a: Rope, b: string): Rope {.rtl, extern: "nroConcRopeStr".} =
|
|
## The concatenation operator for ropes.
|
|
runnableExamples:
|
|
let r = rope("Hello, ") & "Nim!"
|
|
doAssert $r == "Hello, Nim!"
|
|
|
|
result = a & rope(b)
|
|
|
|
proc `&`*(a: string, b: Rope): Rope {.rtl, extern: "nroConcStrRope".} =
|
|
## The concatenation operator for ropes.
|
|
runnableExamples:
|
|
let r = "Hello, " & rope("Nim!")
|
|
doAssert $r == "Hello, Nim!"
|
|
|
|
result = rope(a) & b
|
|
|
|
proc `&`*(a: openArray[Rope]): Rope {.rtl, extern: "nroConcOpenArray".} =
|
|
## The concatenation operator for an `openArray` of ropes.
|
|
runnableExamples:
|
|
let r = &[rope("Hello, "), rope("Nim"), rope("!")]
|
|
doAssert $r == "Hello, Nim!"
|
|
|
|
for item in a: result = result & item
|
|
|
|
proc add*(a: var Rope, b: Rope) {.rtl, extern: "nro$1Rope".} =
|
|
## Adds `b` to the rope `a`.
|
|
runnableExamples:
|
|
var r = rope("Hello, ")
|
|
r.add(rope("Nim!"))
|
|
doAssert $r == "Hello, Nim!"
|
|
|
|
a = a & b
|
|
|
|
proc add*(a: var Rope, b: string) {.rtl, extern: "nro$1Str".} =
|
|
## Adds `b` to the rope `a`.
|
|
runnableExamples:
|
|
var r = rope("Hello, ")
|
|
r.add("Nim!")
|
|
doAssert $r == "Hello, Nim!"
|
|
|
|
a = a & b
|
|
|
|
proc `[]`*(r: Rope, i: int): char {.rtl, extern: "nroCharAt".} =
|
|
## Returns the character at position `i` in the rope `r`. This is quite
|
|
## expensive! Worst-case: O(n). If `i >= r.len or i < 0`, `\0` is returned.
|
|
runnableExamples:
|
|
let r = rope("Hello, Nim!")
|
|
|
|
doAssert r[0] == 'H'
|
|
doAssert r[7] == 'N'
|
|
doAssert r[22] == '\0'
|
|
|
|
var x = r
|
|
var j = i
|
|
if x == nil or i < 0 or i >= r.len: return
|
|
while true:
|
|
if x != nil and x.data.len > 0:
|
|
# leaf
|
|
return x.data[j]
|
|
else:
|
|
if x.left.length > j:
|
|
x = x.left
|
|
else:
|
|
dec(j, x.left.length)
|
|
x = x.right
|
|
|
|
iterator leaves*(r: Rope): string =
|
|
## Iterates over any leaf string in the rope `r`.
|
|
runnableExamples:
|
|
let r = rope("Hello") & rope(", Nim!")
|
|
let s = ["Hello", ", Nim!"]
|
|
var index = 0
|
|
for leave in r.leaves:
|
|
doAssert leave == s[index]
|
|
inc(index)
|
|
|
|
if r != nil:
|
|
var stack = @[r]
|
|
while stack.len > 0:
|
|
var it = stack.pop
|
|
while it.left != nil:
|
|
assert(it.right != nil)
|
|
stack.add(it.right)
|
|
it = it.left
|
|
assert(it != nil)
|
|
yield it.data
|
|
|
|
iterator items*(r: Rope): char =
|
|
## Iterates over any character in the rope `r`.
|
|
for s in leaves(r):
|
|
for c in items(s): yield c
|
|
|
|
proc write*(f: File, r: Rope) {.rtl, extern: "nro$1".} =
|
|
## Writes a rope to a file.
|
|
for s in leaves(r): write(f, s)
|
|
|
|
proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} =
|
|
## Writes a rope to a stream.
|
|
for rs in leaves(r): write(s, rs)
|
|
|
|
proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} =
|
|
## Converts a rope back to a string.
|
|
result = newStringOfCap(r.len)
|
|
for s in leaves(r): add(result, s)
|
|
|
|
proc `%`*(frmt: string, args: openArray[Rope]): Rope {.rtl, extern: "nroFormat".} =
|
|
## `%` substitution operator for ropes. Does not support the `$identifier`
|
|
## nor `${identifier}` notations.
|
|
runnableExamples:
|
|
let r1 = "$1 $2 $3" % [rope("Nim"), rope("is"), rope("a great language")]
|
|
doAssert $r1 == "Nim is a great language"
|
|
|
|
let r2 = "$# $# $#" % [rope("Nim"), rope("is"), rope("a great language")]
|
|
doAssert $r2 == "Nim is a great language"
|
|
|
|
let r3 = "${1} ${2} ${3}" % [rope("Nim"), rope("is"), rope("a great language")]
|
|
doAssert $r3 == "Nim is a great language"
|
|
|
|
var i = 0
|
|
var length = len(frmt)
|
|
result = nil
|
|
var num = 0
|
|
while i < length:
|
|
if frmt[i] == '$':
|
|
inc(i)
|
|
case frmt[i]
|
|
of '$':
|
|
add(result, "$")
|
|
inc(i)
|
|
of '#':
|
|
inc(i)
|
|
add(result, args[num])
|
|
inc(num)
|
|
of '0'..'9':
|
|
var j = 0
|
|
while true:
|
|
j = j * 10 + ord(frmt[i]) - ord('0')
|
|
inc(i)
|
|
if i >= frmt.len or frmt[i] notin {'0'..'9'}: break
|
|
add(result, args[j-1])
|
|
of '{':
|
|
inc(i)
|
|
var j = 0
|
|
while frmt[i] in {'0'..'9'}:
|
|
j = j * 10 + ord(frmt[i]) - ord('0')
|
|
inc(i)
|
|
if frmt[i] == '}': inc(i)
|
|
else: raise newException(ValueError, "invalid format string")
|
|
|
|
add(result, args[j-1])
|
|
else: raise newException(ValueError, "invalid format string")
|
|
var start = i
|
|
while i < length:
|
|
if frmt[i] != '$': inc(i)
|
|
else: break
|
|
if i - 1 >= start:
|
|
add(result, substr(frmt, start, i - 1))
|
|
|
|
proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.rtl, extern: "nro$1".} =
|
|
## Shortcut for `add(c, frmt % args)`.
|
|
runnableExamples:
|
|
var r = rope("Dash: ")
|
|
r.addf "$1 $2 $3", [rope("Nim"), rope("is"), rope("a great language")]
|
|
doAssert $r == "Dash: Nim is a great language"
|
|
|
|
add(c, frmt % args)
|
|
|
|
when not defined(js) and not defined(nimscript):
|
|
const
|
|
bufSize = 1024 # 1 KB is reasonable
|
|
|
|
proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
|
|
## Returns true if the contents of the file `f` equal `r`.
|
|
var
|
|
buf: array[bufSize, char]
|
|
bpos = buf.len
|
|
blen = buf.len
|
|
|
|
for s in leaves(r):
|
|
var spos = 0
|
|
let slen = s.len
|
|
while spos < slen:
|
|
if bpos == blen:
|
|
# Read more data
|
|
bpos = 0
|
|
blen = readBuffer(f, addr(buf[0]), buf.len)
|
|
if blen == 0: # no more data in file
|
|
return false
|
|
let n = min(blen - bpos, slen - spos)
|
|
# TODO: There's gotta be a better way of comparing here...
|
|
if not equalMem(addr(buf[bpos]),
|
|
cast[pointer](cast[int](cstring(s)) + spos), n):
|
|
return false
|
|
spos += n
|
|
bpos += n
|
|
|
|
result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
|
|
|
|
proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} =
|
|
## Returns true if the contents of the file `f` equal `r`. If `f` does not
|
|
## exist, false is returned.
|
|
var f: File
|
|
result = open(f, filename)
|
|
if result:
|
|
result = equalsFile(r, f)
|
|
close(f)
|
|
|
|
new(N) # init dummy node for splay algorithm
|
|
|
|
{.pop.}
|