various bugfixes for generics; added generic sort proc

2026-05-12 16:41:11 +00:00 · 2011-03-03 02:01:22 +01:00
parent f93ca8e42a
commit e424e13bd9
18 changed files with 392 additions and 113 deletions
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -62,7 +62,7 @@ proc finalizeRegEx(x: TRegEx) =

 proc re*(s: string, flags = {reExtended}): TRegEx =
  ## Constructor of regular expressions. Note that Nimrod's
-  ## extended raw string literals supports this syntax ``re"[abc]"`` as
+  ## extended raw string literals support this syntax ``re"[abc]"`` as
  ## a short form for ``re(r"[abc]")``.
  new(result, finalizeRegEx)
  result.h = rawCompile(s, cast[cint](flags))
@@ -152,8 +152,12 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string],
 proc find*(s: string, pattern: TRegEx, start = 0): int =
  ## returns the starting position of ``pattern`` in ``s``. If it does not
  ## match, -1 is returned.
-  var matches: array[0..maxSubpatterns-1, string]
-  result = find(s, pattern, matches, start)
+  var
+    rawMatches: array[0..3 - 1, cint]
+    res = pcre.Exec(pattern.h, nil, s, len(s), start, 0'i32,
+      cast[ptr cint](addr(rawMatches)), 3)
+  if res < 0'i32: return res
+  return rawMatches[0]
  
 iterator findAll*(s: string, pattern: TRegEx, start = 0): string = 
  ## yields all matching captures of pattern in `s`.
--- a/lib/pure/algorithm.nim
+++ b/lib/pure/algorithm.nim
@@ -0,0 +1,101 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2010 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## This module implements some common generic algorithms.
+
+type
+  TSortOrder* = enum   ## sort order
+    Descending, Ascending 
+
+proc `*`*(x: int, order: TSortOrder): int {.inline.} = 
+  ## flips `x` if ``order == Descending``;
+  ## if ``order == Ascending`` then `x` is returned.
+  ## `x` is supposed to be the result of a comparator.
+  var y = order.ord - 1
+  result = (x xor y) - y
+
+proc reverse*[T](a: var openArray[T], first, last: int) =
+  ## reverses the array ``a[first..last]``.
+  var x = first
+  var y = last
+  while x < y:
+    swap(a[x], a[y])
+    dec(y)
+    inc(x)
+
+proc reverse*[T](a: var openArray[T]) =
+  ## reverses the array `a`.
+  reverse(a, 0, a.high)
+
+const
+  onlySafeCode = false
+
+proc merge[T](a, b: var openArray[T], lo, m, hi: int, 
+              cmp: proc (x, y: T): int, order: TSortOrder) =
+  template `<-` (a, b: expr) = 
+    when onlySafeCode:
+      a = b
+    else:
+      copyMem(addr(a), addr(b), sizeof(T))
+  # optimization: If max(left) <= min(right) there is nothing to do!
+  # 1 2 3 4  ## 5 6 7 8
+  # -> O(n) for sorted arrays.
+  # On random data this safes up to 40% of merge calls
+  if cmp(a[m], a[m+1]) * order <= 0: return
+  var j = lo
+  # copy a[j..m] into b:
+  assert j <= m
+  when onlySafeCode:
+    var bb = 0
+    while j <= m:
+      b[bb] = a[j]
+      inc(bb)
+      inc(j)
+  else:
+    CopyMem(addr(b[0]), addr(a[j]), sizeof(T)*(m-j+1))
+    j = m+1
+  var i = 0
+  var k = lo
+  # copy proper element back:
+  while k < j and j <= hi:
+    if cmp(b[i], a[j]) * order <= 0:
+      a[k] <- b[i]
+      inc(i)
+    else:
+      a[k] <- a[j]
+      inc(j)
+    inc(k)
+  # copy rest of b:
+  when onlySafeCode:
+    while k < j:
+      a[k] = b[i]
+      inc(k)
+      inc(i)
+  else:
+    if k < j: copyMem(addr(a[k]), addr(b[i]), sizeof(T)*(j-k))
+
+proc sort*[T](a: var openArray[T], 
+              cmp: proc (x, y: T): int = cmp,
+              order = TSortOrder.Ascending) =
+  ## Default Nimrod sort. The sorting is guaranteed to be stable and 
+  ## the worst case is guaranteed to be O(n log n).
+  ## The current implementation uses an iterative
+  ## mergesort to achieve this. It uses a temporary sequence of 
+  ## length ``a.len div 2``.
+  var n = a.len
+  var b: seq[T]
+  newSeq(b, n div 2)
+  var s = 1
+  while s < n:
+    var m = n-1-s
+    while m >= 0:
+      merge(a, b, max(m-s+1, 0), m, m+s, cmp, order)
+      dec(m, s*2)
+    s = s*2
+
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -97,14 +97,15 @@ proc normalize*(s: string): string {.noSideEffect, procvar,
      add result, s[i]

 proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
-  rtl, extern: "nsuCmpIgnoreCase".} =
+  rtl, extern: "nsuCmpIgnoreCase", procvar.} =
  ## Compares two strings in a case insensitive manner. Returns:
  ##
  ## | 0 iff a == b
  ## | < 0 iff a < b
  ## | > 0 iff a > b
-  var i = 0
-  while i < a.len and i < b.len:
+  var i = 0
+  var m = min(a.len, b.len)
+  while i < m:
    result = ord(toLower(a[i])) - ord(toLower(b[i]))
    if result != 0: return
    inc(i)
@@ -114,7 +115,7 @@ proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
                                       # thus we compile without checks here

 proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
-  rtl, extern: "nsuCmpIgnoreStyle".} =
+  rtl, extern: "nsuCmpIgnoreStyle", procvar.} =
  ## Compares two strings normalized (i.e. case and
  ## underscores do not matter). Returns:
  ##
--- a/lib/pure/unicode.nim
+++ b/lib/pure/unicode.nim
@@ -1075,7 +1075,7 @@ proc binarySearch(c: irune, tab: openArray[iRune], len, stride: int): int =
    return t
  return -1

-proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1".} = 
+proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} = 
  ## Converts `c` into lower case. This works for any Unicode character.
  ## If possible, prefer `toLower` over `toUpper`. 
  var c = irune(c)
@@ -1087,7 +1087,7 @@ proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1".} =
    return TRune(c + toLowerSinglets[p+1] - 500)
  return TRune(c)

-proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1".} = 
+proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} = 
  ## Converts `c` into upper case. This works for any Unicode character.
  ## If possible, prefer `toLower` over `toUpper`. 
  var c = irune(c)
@@ -1099,14 +1099,14 @@ proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1".} =
    return TRune(c + toUpperSinglets[p+1] - 500)
  return TRune(c)

-proc toTitle*(c: TRune): TRune {.rtl, extern: "nuc$1".} = 
+proc toTitle*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} = 
  var c = irune(c)
  var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2)
  if p >= 0 and c == toTitleSinglets[p]:
    return TRune(c + toTitleSinglets[p+1] - 500)
  return TRune(c)

-proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1".} = 
+proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = 
  ## returns true iff `c` is a lower case Unicode character
  ## If possible, prefer `isLower` over `isUpper`. 
  var c = irune(c)
@@ -1118,7 +1118,7 @@ proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1".} =
  if p >= 0 and c == toUpperSinglets[p]:
    return true

-proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1".} = 
+proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = 
  ## returns true iff `c` is a upper case Unicode character
  ## If possible, prefer `isLower` over `isUpper`. 
  var c = irune(c)
@@ -1130,7 +1130,7 @@ proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1".} =
  if p >= 0 and c == toLowerSinglets[p]:
    return true

-proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1".} = 
+proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = 
  ## returns true iff `c` is an *alpha* Unicode character (i.e. a letter)
  if isUpper(c) or isLower(c): 
    return true
@@ -1142,10 +1142,10 @@ proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1".} =
  if p >= 0 and c == alphaSinglets[p]:
    return true
  
-proc isTitle*(c: TRune): bool {.rtl, extern: "nuc$1".} = 
+proc isTitle*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = 
  return isUpper(c) and isLower(c)

-proc isWhiteSpace*(c: TRune): bool {.rtl, extern: "nuc$1".} = 
+proc isWhiteSpace*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} = 
  ## returns true iff `c` is a Unicode whitespace character
  var c = irune(c)
  var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2)
@@ -1161,7 +1161,7 @@ iterator runes*(s: string): TRune =
    fastRuneAt(s, i, result, true)
    yield result

-proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1".} = 
+proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1", procvar.} = 
  ## compares two UTF8 strings and ignores the case. Returns:
  ##
  ## | 0 iff a == b
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -623,7 +623,7 @@ template `not_in` * (x, y: expr): expr = not contains(y, x)
 proc `is` *[T, S](x: T, y: S): bool {.magic: "Is", noSideEffect.}
 template `is_not` *(x, y: expr): expr = not (x is y)

-proc cmp*[T, S: typeDesc](x: T, y: S): int =
+proc cmp*[T, S: typeDesc](x: T, y: S): int {.procvar.} =
  ## Generic compare proc. Returns a value < 0 iff x < y, a value > 0 iff x > y
  ## and 0 iff x == y. This is useful for writing generic algorithms without
  ## performance loss. This generic implementation uses the `==` and `<`
@@ -632,7 +632,7 @@ proc cmp*[T, S: typeDesc](x: T, y: S): int =
  if x < y: return -1
  return 1

-proc cmp*(x, y: string): int {.noSideEffect.}
+proc cmp*(x, y: string): int {.noSideEffect, procvar.}
  ## Compare proc for strings. More efficient than the generic version.

 proc `@` * [IDX, T](a: array[IDX, T]): seq[T] {.
@@ -714,7 +714,8 @@ const

  cpuEndian* {.magic: "CpuEndian"}: TEndian = littleEndian
    ## is the endianness of the target CPU. This is a valuable piece of
-    ## information for low-level code only. This works thanks to compiler magic.
+    ## information for low-level code only. This works thanks to compiler
+    ## magic.
    
  hostOS* {.magic: "HostOS"}: string = ""
    ## a string that describes the host operating system. Possible values:
@@ -1307,7 +1308,7 @@ proc echo*[Ty](x: openarray[Ty]) {.magic: "Echo".}
  ## available for the ECMAScript target too!

 template newException*(exceptn, message: expr): expr = 
-  ## creates an exception object of type "exceptn" and sets its ``msg`` field
+  ## creates an exception object of type ``exceptn`` and sets its ``msg`` field
  ## to `message`. Returns the new exception object. 
  block: # open a new scope
    var
@@ -1365,7 +1366,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
  include "system/ansi_c"

  proc cmp(x, y: string): int =
-    return int(c_strcmp(x, y))
+    result = int(c_strcmp(x, y))

  const pccHack = if defined(pcc): "_" else: "" # Hack for PCC
  when defined(windows):
@@ -1419,8 +1420,9 @@ when not defined(EcmaScript) and not defined(NimrodVM):
      ## stream* is a good idea, but since it is named ``stderr`` there are few
      ## programs out there that distinguish properly between ``stdout`` and
      ## ``stderr``. So, that's what you get if you don't name your variables
-      ## appropriately. It also annoys people if redirection via ``>output.txt``
-      ## does not work because the program writes to ``stderr``.
+      ## appropriately. It also annoys people if redirection
+      ## via ``>output.txt`` does not work because the program writes
+      ## to ``stderr``.

  proc Open*(f: var TFile, filename: string,
             mode: TFileMode = fmRead, bufSize: int = -1): Bool