various bugfixes for generics; added generic sort proc

This commit is contained in:
Araq
2011-03-03 02:01:22 +01:00
parent f93ca8e42a
commit e424e13bd9
18 changed files with 392 additions and 113 deletions

View File

@@ -62,7 +62,7 @@ proc finalizeRegEx(x: TRegEx) =
proc re*(s: string, flags = {reExtended}): TRegEx =
## Constructor of regular expressions. Note that Nimrod's
## extended raw string literals supports this syntax ``re"[abc]"`` as
## extended raw string literals support this syntax ``re"[abc]"`` as
## a short form for ``re(r"[abc]")``.
new(result, finalizeRegEx)
result.h = rawCompile(s, cast[cint](flags))
@@ -152,8 +152,12 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string],
proc find*(s: string, pattern: TRegEx, start = 0): int =
## returns the starting position of ``pattern`` in ``s``. If it does not
## match, -1 is returned.
var matches: array[0..maxSubpatterns-1, string]
result = find(s, pattern, matches, start)
var
rawMatches: array[0..3 - 1, cint]
res = pcre.Exec(pattern.h, nil, s, len(s), start, 0'i32,
cast[ptr cint](addr(rawMatches)), 3)
if res < 0'i32: return res
return rawMatches[0]
iterator findAll*(s: string, pattern: TRegEx, start = 0): string =
## yields all matching captures of pattern in `s`.

101
lib/pure/algorithm.nim Normal file
View File

@@ -0,0 +1,101 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2010 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## This module implements some common generic algorithms.
type
TSortOrder* = enum ## sort order
Descending, Ascending
proc `*`*(x: int, order: TSortOrder): int {.inline.} =
## flips `x` if ``order == Descending``;
## if ``order == Ascending`` then `x` is returned.
## `x` is supposed to be the result of a comparator.
var y = order.ord - 1
result = (x xor y) - y
proc reverse*[T](a: var openArray[T], first, last: int) =
## reverses the array ``a[first..last]``.
var x = first
var y = last
while x < y:
swap(a[x], a[y])
dec(y)
inc(x)
proc reverse*[T](a: var openArray[T]) =
## reverses the array `a`.
reverse(a, 0, a.high)
const
onlySafeCode = false
proc merge[T](a, b: var openArray[T], lo, m, hi: int,
cmp: proc (x, y: T): int, order: TSortOrder) =
template `<-` (a, b: expr) =
when onlySafeCode:
a = b
else:
copyMem(addr(a), addr(b), sizeof(T))
# optimization: If max(left) <= min(right) there is nothing to do!
# 1 2 3 4 ## 5 6 7 8
# -> O(n) for sorted arrays.
# On random data this safes up to 40% of merge calls
if cmp(a[m], a[m+1]) * order <= 0: return
var j = lo
# copy a[j..m] into b:
assert j <= m
when onlySafeCode:
var bb = 0
while j <= m:
b[bb] = a[j]
inc(bb)
inc(j)
else:
CopyMem(addr(b[0]), addr(a[j]), sizeof(T)*(m-j+1))
j = m+1
var i = 0
var k = lo
# copy proper element back:
while k < j and j <= hi:
if cmp(b[i], a[j]) * order <= 0:
a[k] <- b[i]
inc(i)
else:
a[k] <- a[j]
inc(j)
inc(k)
# copy rest of b:
when onlySafeCode:
while k < j:
a[k] = b[i]
inc(k)
inc(i)
else:
if k < j: copyMem(addr(a[k]), addr(b[i]), sizeof(T)*(j-k))
proc sort*[T](a: var openArray[T],
cmp: proc (x, y: T): int = cmp,
order = TSortOrder.Ascending) =
## Default Nimrod sort. The sorting is guaranteed to be stable and
## the worst case is guaranteed to be O(n log n).
## The current implementation uses an iterative
## mergesort to achieve this. It uses a temporary sequence of
## length ``a.len div 2``.
var n = a.len
var b: seq[T]
newSeq(b, n div 2)
var s = 1
while s < n:
var m = n-1-s
while m >= 0:
merge(a, b, max(m-s+1, 0), m, m+s, cmp, order)
dec(m, s*2)
s = s*2

View File

@@ -97,14 +97,15 @@ proc normalize*(s: string): string {.noSideEffect, procvar,
add result, s[i]
proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
rtl, extern: "nsuCmpIgnoreCase".} =
rtl, extern: "nsuCmpIgnoreCase", procvar.} =
## Compares two strings in a case insensitive manner. Returns:
##
## | 0 iff a == b
## | < 0 iff a < b
## | > 0 iff a > b
var i = 0
while i < a.len and i < b.len:
var i = 0
var m = min(a.len, b.len)
while i < m:
result = ord(toLower(a[i])) - ord(toLower(b[i]))
if result != 0: return
inc(i)
@@ -114,7 +115,7 @@ proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
# thus we compile without checks here
proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
rtl, extern: "nsuCmpIgnoreStyle".} =
rtl, extern: "nsuCmpIgnoreStyle", procvar.} =
## Compares two strings normalized (i.e. case and
## underscores do not matter). Returns:
##

View File

@@ -1075,7 +1075,7 @@ proc binarySearch(c: irune, tab: openArray[iRune], len, stride: int): int =
return t
return -1
proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1".} =
proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} =
## Converts `c` into lower case. This works for any Unicode character.
## If possible, prefer `toLower` over `toUpper`.
var c = irune(c)
@@ -1087,7 +1087,7 @@ proc toLower*(c: TRune): TRune {.rtl, extern: "nuc$1".} =
return TRune(c + toLowerSinglets[p+1] - 500)
return TRune(c)
proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1".} =
proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} =
## Converts `c` into upper case. This works for any Unicode character.
## If possible, prefer `toLower` over `toUpper`.
var c = irune(c)
@@ -1099,14 +1099,14 @@ proc toUpper*(c: TRune): TRune {.rtl, extern: "nuc$1".} =
return TRune(c + toUpperSinglets[p+1] - 500)
return TRune(c)
proc toTitle*(c: TRune): TRune {.rtl, extern: "nuc$1".} =
proc toTitle*(c: TRune): TRune {.rtl, extern: "nuc$1", procvar.} =
var c = irune(c)
var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2)
if p >= 0 and c == toTitleSinglets[p]:
return TRune(c + toTitleSinglets[p+1] - 500)
return TRune(c)
proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1".} =
proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} =
## returns true iff `c` is a lower case Unicode character
## If possible, prefer `isLower` over `isUpper`.
var c = irune(c)
@@ -1118,7 +1118,7 @@ proc isLower*(c: TRune): bool {.rtl, extern: "nuc$1".} =
if p >= 0 and c == toUpperSinglets[p]:
return true
proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1".} =
proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} =
## returns true iff `c` is a upper case Unicode character
## If possible, prefer `isLower` over `isUpper`.
var c = irune(c)
@@ -1130,7 +1130,7 @@ proc isUpper*(c: TRune): bool {.rtl, extern: "nuc$1".} =
if p >= 0 and c == toLowerSinglets[p]:
return true
proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1".} =
proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} =
## returns true iff `c` is an *alpha* Unicode character (i.e. a letter)
if isUpper(c) or isLower(c):
return true
@@ -1142,10 +1142,10 @@ proc isAlpha*(c: TRune): bool {.rtl, extern: "nuc$1".} =
if p >= 0 and c == alphaSinglets[p]:
return true
proc isTitle*(c: TRune): bool {.rtl, extern: "nuc$1".} =
proc isTitle*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} =
return isUpper(c) and isLower(c)
proc isWhiteSpace*(c: TRune): bool {.rtl, extern: "nuc$1".} =
proc isWhiteSpace*(c: TRune): bool {.rtl, extern: "nuc$1", procvar.} =
## returns true iff `c` is a Unicode whitespace character
var c = irune(c)
var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2)
@@ -1161,7 +1161,7 @@ iterator runes*(s: string): TRune =
fastRuneAt(s, i, result, true)
yield result
proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1".} =
proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1", procvar.} =
## compares two UTF8 strings and ignores the case. Returns:
##
## | 0 iff a == b

View File

@@ -623,7 +623,7 @@ template `not_in` * (x, y: expr): expr = not contains(y, x)
proc `is` *[T, S](x: T, y: S): bool {.magic: "Is", noSideEffect.}
template `is_not` *(x, y: expr): expr = not (x is y)
proc cmp*[T, S: typeDesc](x: T, y: S): int =
proc cmp*[T, S: typeDesc](x: T, y: S): int {.procvar.} =
## Generic compare proc. Returns a value < 0 iff x < y, a value > 0 iff x > y
## and 0 iff x == y. This is useful for writing generic algorithms without
## performance loss. This generic implementation uses the `==` and `<`
@@ -632,7 +632,7 @@ proc cmp*[T, S: typeDesc](x: T, y: S): int =
if x < y: return -1
return 1
proc cmp*(x, y: string): int {.noSideEffect.}
proc cmp*(x, y: string): int {.noSideEffect, procvar.}
## Compare proc for strings. More efficient than the generic version.
proc `@` * [IDX, T](a: array[IDX, T]): seq[T] {.
@@ -714,7 +714,8 @@ const
cpuEndian* {.magic: "CpuEndian"}: TEndian = littleEndian
## is the endianness of the target CPU. This is a valuable piece of
## information for low-level code only. This works thanks to compiler magic.
## information for low-level code only. This works thanks to compiler
## magic.
hostOS* {.magic: "HostOS"}: string = ""
## a string that describes the host operating system. Possible values:
@@ -1307,7 +1308,7 @@ proc echo*[Ty](x: openarray[Ty]) {.magic: "Echo".}
## available for the ECMAScript target too!
template newException*(exceptn, message: expr): expr =
## creates an exception object of type "exceptn" and sets its ``msg`` field
## creates an exception object of type ``exceptn`` and sets its ``msg`` field
## to `message`. Returns the new exception object.
block: # open a new scope
var
@@ -1365,7 +1366,7 @@ when not defined(EcmaScript) and not defined(NimrodVM):
include "system/ansi_c"
proc cmp(x, y: string): int =
return int(c_strcmp(x, y))
result = int(c_strcmp(x, y))
const pccHack = if defined(pcc): "_" else: "" # Hack for PCC
when defined(windows):
@@ -1419,8 +1420,9 @@ when not defined(EcmaScript) and not defined(NimrodVM):
## stream* is a good idea, but since it is named ``stderr`` there are few
## programs out there that distinguish properly between ``stdout`` and
## ``stderr``. So, that's what you get if you don't name your variables
## appropriately. It also annoys people if redirection via ``>output.txt``
## does not work because the program writes to ``stderr``.
## appropriately. It also annoys people if redirection
## via ``>output.txt`` does not work because the program writes
## to ``stderr``.
proc Open*(f: var TFile, filename: string,
mode: TFileMode = fmRead, bufSize: int = -1): Bool