faster CountTable sort(), optional SortOrder (#11010)

* use existing sort for CountTable, and
add SortOrder options to CountTable, OrderedTable sort(s)

* add some tests, runnables, etc.

* fix runnable imports
This commit is contained in:
Andy Davidoff
2019-04-16 03:42:54 -04:00
committed by Andreas Rumpf
parent 374a85bb9c
commit 01f09567c4
2 changed files with 60 additions and 42 deletions

View File

@@ -216,7 +216,7 @@
## * `hashes module<hashes.html>`_ for helper functions for hashing
import hashes, math
import hashes, math, algorithm
include "system/inclrtl"
@@ -1467,7 +1467,7 @@ proc clear*[A, B](t: var OrderedTable[A, B]) =
t.first = -1
t.last = -1
proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x,y: (A, B)): int) =
proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x,y: (A, B)): int, order = SortOrder.Ascending) =
## Sorts ``t`` according to the function ``cmp``.
##
## This modifies the internal list
@@ -1475,12 +1475,15 @@ proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x,y: (A, B)): int) =
## call but key lookup and insertions remain possible after ``sort`` (in
## contrast to the `sort proc<#sort,CountTable[A]>`_ for count tables).
runnableExamples:
import algorithm
var a = initOrderedTable[char, int]()
for i, c in "cab":
a[c] = 10*i
doAssert a == {'c': 0, 'a': 10, 'b': 20}.toOrderedTable
a.sort(system.cmp)
doAssert a == {'a': 10, 'b': 20, 'c': 0}.toOrderedTable
a.sort(system.cmp, order=SortOrder.Descending)
doAssert a == {'c': 0, 'b': 20, 'a': 10}.toOrderedTable
var list = t.first
var
@@ -1508,7 +1511,7 @@ proc sort*[A, B](t: var OrderedTable[A, B], cmp: proc (x,y: (A, B)): int) =
elif qsize == 0 or q < 0:
e = p; p = t.data[p].next; dec(psize)
elif cmp((t.data[p].key, t.data[p].val),
(t.data[q].key, t.data[q].val)) <= 0:
(t.data[q].key, t.data[q].val)) * order <= 0:
e = p; p = t.data[p].next; dec(psize)
else:
e = q; q = t.data[q].next; dec(qsize)
@@ -1875,7 +1878,7 @@ proc clear*[A, B](t: var OrderedTableRef[A, B]) =
doAssert len(a) == 0
clear(t[])
proc sort*[A, B](t: OrderedTableRef[A, B], cmp: proc (x,y: (A, B)): int) =
proc sort*[A, B](t: OrderedTableRef[A, B], cmp: proc (x,y: (A, B)): int, order = SortOrder.Ascending) =
## Sorts ``t`` according to the function ``cmp``.
##
## This modifies the internal list
@@ -1883,13 +1886,16 @@ proc sort*[A, B](t: OrderedTableRef[A, B], cmp: proc (x,y: (A, B)): int) =
## call but key lookup and insertions remain possible after ``sort`` (in
## contrast to the `sort proc<#sort,CountTableRef[A]>`_ for count tables).
runnableExamples:
import algorithm
var a = newOrderedTable[char, int]()
for i, c in "cab":
a[c] = 10*i
doAssert a == {'c': 0, 'a': 10, 'b': 20}.newOrderedTable
a.sort(system.cmp)
doAssert a == {'a': 10, 'b': 20, 'c': 0}.newOrderedTable
t[].sort(cmp)
a.sort(system.cmp, order=SortOrder.Descending)
doAssert a == {'c': 0, 'b': 20, 'a': 10}.newOrderedTable
t[].sort(cmp, order=order)
proc `$`*[A, B](t: OrderedTableRef[A, B]): string =
## The ``$`` operator for hash tables. Used internally when calling `echo`
@@ -2199,30 +2205,27 @@ proc clear*[A](t: var CountTable[A]) =
## Resets the table so that it is empty.
clearImpl()
proc sort*[A](t: var CountTable[A]) =
## Sorts the count table so that the entry with the highest counter comes
## first.
func ctCmp[T](a, b: tuple[key: T, val: int]): int =
result = system.cmp(a.val, b.val)
proc sort*[A](t: var CountTable[A], order = SortOrder.Descending) =
## Sorts the count table so that, by default, the entry with the
## highest counter comes first.
##
## **This is destructive! You must not modify ``t`` afterwards!**
##
## You can use the iterators `pairs<#pairs.i,CountTable[A]>`_,
## `keys<#keys.i,CountTable[A]>`_, and `values<#values.i,CountTable[A]>`_
## to iterate over ``t`` in the sorted order.
# we use shellsort here; fast enough and simple
var h = 1
while true:
h = 3 * h + 1
if h >= high(t.data): break
while true:
h = h div 3
for i in countup(h, high(t.data)):
var j = i
while t.data[j-h].val <= t.data[j].val:
swap(t.data[j], t.data[j-h])
j = j-h
if j < h: break
if h == 1: break
runnableExamples:
import algorithm, sequtils
var a = toCountTable("abracadabra")
doAssert a == "aaaaabbrrcd".toCountTable
a.sort()
doAssert toSeq(a.values) == @[5, 2, 2, 1, 1]
a.sort(SortOrder.Ascending)
doAssert toSeq(a.values) == @[1, 1, 2, 2, 5]
t.data.sort(cmp=ctCmp, order=order)
proc merge*[A](s: var CountTable[A], t: CountTable[A]) =
## Merges the second table into the first one (must be declared as `var`).
@@ -2467,16 +2470,16 @@ proc clear*[A](t: CountTableRef[A]) =
## Resets the table so that it is empty.
clearImpl()
proc sort*[A](t: CountTableRef[A]) =
## Sorts the count table so that the entry with the highest counter comes
## first.
proc sort*[A](t: CountTableRef[A], order = SortOrder.Descending) =
## Sorts the count table so that, by default, the entry with the
## highest counter comes first.
##
## **This is destructive! You must not modify `t` afterwards!**
##
## You can use the iterators `pairs<#pairs.i,CountTableRef[A]>`_,
## `keys<#keys.i,CountTableRef[A]>`_, and `values<#values.i,CountTableRef[A]>`_
## to iterate over ``t`` in the sorted order.
t[].sort
t[].sort(order=order)
proc merge*[A](s, t: CountTableRef[A]) =
## Merges the second table into the first one.

View File

@@ -8,7 +8,7 @@ And we get here
'''
joinable: false
"""
import hashes, sequtils, tables
import hashes, sequtils, tables, algorithm
# test should not be joined because it takes too long.
block tableadds:
@@ -274,22 +274,30 @@ block tablesref:
block countTableTest1:
var s = data.toTable
var t = newCountTable[string]()
for k in s.keys: t.inc(k)
for k in t.keys: assert t[k] == 1
t.inc("90", 3)
t.inc("12", 2)
t.inc("34", 1)
var r = newCountTable[string]()
for x in [t, r]:
for k in s.keys:
x.inc(k)
assert x[k] == 1
x.inc("90", 3)
x.inc("12", 2)
x.inc("34", 1)
assert t.largest()[0] == "90"
t.sort()
var i = 0
for k, v in t.pairs:
case i
of 0: assert k == "90" and v == 4
of 1: assert k == "12" and v == 3
of 2: assert k == "34" and v == 2
else: break
inc i
r.sort(SortOrder.Ascending)
var ps1 = toSeq t.pairs
var ps2 = toSeq r.pairs
ps2.reverse()
for ps in [ps1, ps2]:
var i = 0
for (k, v) in ps:
case i
of 0: assert k == "90" and v == 4
of 1: assert k == "12" and v == 3
of 2: assert k == "34" and v == 2
else: break
inc i
block SyntaxTest:
var x = newTable[int, string]({:})
@@ -308,13 +316,20 @@ block tablesref:
var t = newOrderedTable[string, int](2)
for key, val in items(data): t[key] = val
for key, val in items(data): assert t[key] == val
t.sort(proc (x, y: tuple[key: string, val: int]): int = cmp(x.key, y.key))
proc cmper(x, y: tuple[key: string, val: int]): int = cmp(x.key, y.key)
t.sort(cmper)
var i = 0
# `pairs` needs to yield in sorted order:
for key, val in pairs(t):
doAssert key == sorteddata[i][0]
doAssert val == sorteddata[i][1]
inc(i)
t.sort(cmper, order=SortOrder.Descending)
i = 0
for key, val in pairs(t):
doAssert key == sorteddata[high(data)-i][0]
doAssert val == sorteddata[high(data)-i][1]
inc(i)
# check that lookup still works:
for key, val in pairs(t):