Files
Nim/lib/system/sysstr.nim
Parashurama 5d7d8816e5 fixes some issues with underscores in float literals. add more tests.
fixes some potential issues with underscores in float literals.
adds more checks for badly positionned underscores in float literals.
adds more test files.
2016-05-19 21:03:22 +02:00

509 lines
15 KiB
Nim

#
#
# Nim's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# string & sequence handling procedures needed by the code generator
# strings are dynamically resized, have a length field
# and are zero-terminated, so they can be casted to C
# strings easily
# we don't use refcounts because that's a behaviour
# the programmer may not want
proc resize(old: int): int {.inline.} =
if old <= 0: result = 4
elif old < 65536: result = old * 2
else: result = old * 3 div 2 # for large arrays * 3/2 is better
proc cmpStrings(a, b: NimString): int {.inline, compilerProc.} =
if a == b: return 0
if a == nil: return -1
if b == nil: return 1
return c_strcmp(a.data, b.data)
proc eqStrings(a, b: NimString): bool {.inline, compilerProc.} =
if a == b: return true
if a == nil or b == nil: return false
return a.len == b.len and
c_memcmp(a.data, b.data, a.len) == 0'i32
when declared(allocAtomic):
template allocStr(size: expr): expr =
cast[NimString](allocAtomic(size))
template allocStrNoInit(size: expr): expr =
cast[NimString](boehmAllocAtomic(size))
else:
template allocStr(size: expr): expr =
cast[NimString](newObj(addr(strDesc), size))
template allocStrNoInit(size: expr): expr =
cast[NimString](newObjNoInit(addr(strDesc), size))
proc rawNewStringNoInit(space: int): NimString {.compilerProc.} =
var s = space
if s < 7: s = 7
result = allocStrNoInit(sizeof(TGenericSeq) + s + 1)
result.reserved = s
when defined(gogc):
result.elemSize = 1
proc rawNewString(space: int): NimString {.compilerProc.} =
var s = space
if s < 7: s = 7
result = allocStr(sizeof(TGenericSeq) + s + 1)
result.reserved = s
when defined(gogc):
result.elemSize = 1
proc mnewString(len: int): NimString {.compilerProc.} =
result = rawNewString(len)
result.len = len
proc copyStrLast(s: NimString, start, last: int): NimString {.compilerProc.} =
var start = max(start, 0)
var len = min(last, s.len-1) - start + 1
if len > 0:
result = rawNewStringNoInit(len)
result.len = len
c_memcpy(result.data, addr(s.data[start]), len)
result.data[len] = '\0'
else:
result = rawNewString(len)
proc copyStr(s: NimString, start: int): NimString {.compilerProc.} =
result = copyStrLast(s, start, s.len-1)
proc toNimStr(str: cstring, len: int): NimString {.compilerProc.} =
result = rawNewStringNoInit(len)
result.len = len
c_memcpy(result.data, str, len + 1)
proc cstrToNimstr(str: cstring): NimString {.compilerRtl.} =
result = toNimStr(str, c_strlen(str))
proc copyString(src: NimString): NimString {.compilerRtl.} =
if src != nil:
if (src.reserved and seqShallowFlag) != 0:
result = src
else:
result = rawNewStringNoInit(src.len)
result.len = src.len
c_memcpy(result.data, src.data, src.len + 1)
proc copyStringRC1(src: NimString): NimString {.compilerRtl.} =
if src != nil:
when declared(newObjRC1):
var s = src.len
if s < 7: s = 7
result = cast[NimString](newObjRC1(addr(strDesc), sizeof(TGenericSeq) +
s+1))
result.reserved = s
else:
result = rawNewStringNoInit(src.len)
result.len = src.len
c_memcpy(result.data, src.data, src.len + 1)
proc hashString(s: string): int {.compilerproc.} =
# the compiler needs exactly the same hash function!
# this used to be used for efficient generation of string case statements
var h = 0
for i in 0..len(s)-1:
h = h +% ord(s[i])
h = h +% h shl 10
h = h xor (h shr 6)
h = h +% h shl 3
h = h xor (h shr 11)
h = h +% h shl 15
result = h
proc addChar(s: NimString, c: char): NimString =
# is compilerproc!
result = s
if result.len >= result.space:
result.reserved = resize(result.space)
result = cast[NimString](growObj(result,
sizeof(TGenericSeq) + result.reserved + 1))
result.data[result.len] = c
result.data[result.len+1] = '\0'
inc(result.len)
# These routines should be used like following:
# <Nim code>
# s &= "Hello " & name & ", how do you feel?"
#
# <generated C code>
# {
# s = resizeString(s, 6 + name->len + 17);
# appendString(s, strLit1);
# appendString(s, strLit2);
# appendString(s, strLit3);
# }
#
# <Nim code>
# s = "Hello " & name & ", how do you feel?"
#
# <generated C code>
# {
# string tmp0;
# tmp0 = rawNewString(6 + name->len + 17);
# appendString(s, strLit1);
# appendString(s, strLit2);
# appendString(s, strLit3);
# s = tmp0;
# }
#
# <Nim code>
# s = ""
#
# <generated C code>
# s = rawNewString(0);
proc resizeString(dest: NimString, addlen: int): NimString {.compilerRtl.} =
if dest.len + addlen <= dest.space:
result = dest
else: # slow path:
var sp = max(resize(dest.space), dest.len + addlen)
result = cast[NimString](growObj(dest, sizeof(TGenericSeq) + sp + 1))
result.reserved = sp
#result = rawNewString(sp)
#copyMem(result, dest, dest.len + sizeof(TGenericSeq))
# DO NOT UPDATE LEN YET: dest.len = newLen
proc appendString(dest, src: NimString) {.compilerproc, inline.} =
c_memcpy(addr(dest.data[dest.len]), src.data, src.len + 1)
inc(dest.len, src.len)
proc appendChar(dest: NimString, c: char) {.compilerproc, inline.} =
dest.data[dest.len] = c
dest.data[dest.len+1] = '\0'
inc(dest.len)
proc setLengthStr(s: NimString, newLen: int): NimString {.compilerRtl.} =
var n = max(newLen, 0)
if n <= s.space:
result = s
else:
result = resizeString(s, n)
result.len = n
result.data[n] = '\0'
# ----------------- sequences ----------------------------------------------
proc incrSeq(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
# increments the length by one:
# this is needed for supporting ``add``;
#
# add(seq, x) generates:
# seq = incrSeq(seq, sizeof(x));
# seq[seq->len-1] = x;
result = seq
if result.len >= result.space:
result.reserved = resize(result.space)
result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
GenericSeqSize))
inc(result.len)
proc incrSeqV2(seq: PGenericSeq, elemSize: int): PGenericSeq {.compilerProc.} =
# incrSeq version 2
result = seq
if result.len >= result.space:
result.reserved = resize(result.space)
result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
GenericSeqSize))
proc setLengthSeq(seq: PGenericSeq, elemSize, newLen: int): PGenericSeq {.
compilerRtl.} =
result = seq
if result.space < newLen:
result.reserved = max(resize(result.space), newLen)
result = cast[PGenericSeq](growObj(result, elemSize * result.reserved +
GenericSeqSize))
elif newLen < result.len:
# we need to decref here, otherwise the GC leaks!
when not defined(boehmGC) and not defined(nogc) and
not defined(gcMarkAndSweep) and not defined(gogc) and
not defined(gcStack):
when false: # compileOption("gc", "v2"):
for i in newLen..result.len-1:
let len0 = gch.tempStack.len
forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%
GenericSeqSize +% (i*%elemSize)),
extGetCellType(result).base, waPush)
let len1 = gch.tempStack.len
for i in len0 .. <len1:
doDecRef(gch.tempStack.d[i], LocalHeap, MaybeCyclic)
gch.tempStack.len = len0
else:
for i in newLen..result.len-1:
forAllChildrenAux(cast[pointer](cast[ByteAddress](result) +%
GenericSeqSize +% (i*%elemSize)),
extGetCellType(result).base, waZctDecRef)
# XXX: zeroing out the memory can still result in crashes if a wiped-out
# cell is aliased by another pointer (ie proc parameter or a let variable).
# This is a tought problem, because even if we don't zeroMem here, in the
# presence of user defined destructors, the user will expect the cell to be
# "destroyed" thus creating the same problem. We can destoy the cell in the
# finalizer of the sequence, but this makes destruction non-deterministic.
zeroMem(cast[pointer](cast[ByteAddress](result) +% GenericSeqSize +%
(newLen*%elemSize)), (result.len-%newLen) *% elemSize)
result.len = newLen
# --------------- other string routines ----------------------------------
proc nimIntToStr(x: int): string {.compilerRtl.} =
result = newString(sizeof(x)*4)
var i = 0
var y = x
while true:
var d = y div 10
result[i] = chr(abs(int(y - d*10)) + ord('0'))
inc(i)
y = d
if y == 0: break
if x < 0:
result[i] = '-'
inc(i)
setLen(result, i)
# mirror the string:
for j in 0..i div 2 - 1:
swap(result[j], result[i-j-1])
proc nimFloatToStr(f: float): string {.compilerproc.} =
var buf: array[0..64, char]
var n: int = c_sprintf(buf, "%.16g", f)
var hasDot = false
for i in 0..n-1:
if buf[i] == ',':
buf[i] = '.'
hasDot = true
elif buf[i] in {'a'..'z', 'A'..'Z', '.'}:
hasDot = true
if not hasDot:
buf[n] = '.'
buf[n+1] = '0'
buf[n+2] = '\0'
# On Windows nice numbers like '1.#INF', '-1.#INF' or '1.#NAN' are produced.
# We want to get rid of these here:
if buf[n-1] == 'N':
result = "nan"
elif buf[n-1] == 'F':
if buf[0] == '-':
result = "-inf"
else:
result = "inf"
else:
result = $buf
proc strtod(buf: cstring, endptr: ptr cstring): float64 {.importc,
header: "<stdlib.h>", noSideEffect.}
const
IdentChars = {'a'..'z', 'A'..'Z', '0'..'9', '_'}
powtens = [ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
1e20, 1e21, 1e22]
proc nimParseBiggestFloat(s: string, number: var BiggestFloat,
start = 0): int {.compilerProc.} =
# This routine attempt to parse float that can parsed quickly.
# ie whose integer part can fit inside a 53bits integer.
# their real exponent must also be <= 22. If the float doesn't follow
# these restrictions, transform the float into this form:
# INTEGER * 10 ^ exponent and leave the work to standard `strtod()`.
# This avoid the problems of decimal character portability.
# see: http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
var
i = start
sign = 1.0
kdigits, fdigits = 0
exponent: int
integer: uint64
fraction: uint64
frac_exponent= 0
exp_sign = 1
first_digit = -1
has_sign = false
# Sign?
if s[i] == '+' or s[i] == '-':
has_sign = true
if s[i] == '-':
sign = -1.0
inc(i)
# NaN?
if s[i] == 'N' or s[i] == 'n':
if s[i+1] == 'A' or s[i+1] == 'a':
if s[i+2] == 'N' or s[i+2] == 'n':
if s[i+3] notin IdentChars:
number = NaN
return i+3 - start
return 0
# Inf?
if s[i] == 'I' or s[i] == 'i':
if s[i+1] == 'N' or s[i+1] == 'n':
if s[i+2] == 'F' or s[i+2] == 'f':
if s[i+3] notin IdentChars:
number = Inf*sign
return i+3 - start
return 0
# Skip leading zero
while s[i] == '0':
inc(i)
while s[i] == '_': inc(i)
if s[i] in {'0'..'9'}:
first_digit = (s[i].ord - '0'.ord)
# Integer part?
while s[i] in {'0'..'9'}:
inc(kdigits)
integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
inc(i)
while s[i] == '_': inc(i)
# Fractional part?
if s[i] == '.':
inc(i)
# if no integer part, Skip leading zeros
if kdigits <= 0:
while s[i] == '0':
inc(frac_exponent)
inc(i)
while s[i] == '_': inc(i)
if first_digit == -1 and s[i] in {'0'..'9'}:
first_digit = (s[i].ord - '0'.ord)
# get fractional part
while s[i] in {'0'..'9'}:
inc(fdigits)
inc(frac_exponent)
integer = integer * 10'u64 + (s[i].ord - '0'.ord).uint64
inc(i)
while s[i] == '_': inc(i)
# if has no digits: return error
if kdigits + fdigits <= 0 and
(i == start or # was only zero
has_sign) : # or only '+' or '-
return 0
if s[i] in {'e', 'E'}:
inc(i)
if s[i] == '+' or s[i] == '-':
if s[i] == '-':
exp_sign = -1
inc(i)
if s[i] notin {'0'..'9'}:
return 0
while s[i] in {'0'..'9'}:
exponent = exponent * 10 + (ord(s[i]) - ord('0'))
inc(i)
while s[i] == '_': inc(i) # underscores are allowed and ignored
var real_exponent = exp_sign*exponent - frac_exponent
let exp_negative = real_exponent < 0
var abs_exponent = abs(real_exponent)
# if exponent greater than can be represented: +/- zero or infinity
if abs_exponent > 999:
if exp_negative:
number = 0.0*sign
else:
number = Inf*sign
return i - start
# if integer is representable in 53 bits: fast path
# max fast path integer is 1<<53 - 1 or 8999999999999999 (16 digits)
if kdigits + fdigits <= 16 and first_digit <= 8:
# max float power of ten with set bits above the 53th bit is 10^22
if abs_exponent <= 22:
if exp_negative:
number = sign * integer.float / powtens[abs_exponent]
else:
number = sign * integer.float * powtens[abs_exponent]
return i - start
# if exponent is greater try to fit extra exponent above 22 by multiplying
# integer part is there is space left.
let slop = 15 - kdigits - fdigits
if abs_exponent <= 22 + slop and not exp_negative:
number = sign * integer.float * powtens[slop] * powtens[abs_exponent-slop]
return i - start
# if failed: slow path with strtod.
var t: array[500, char] # flaviu says: 325 is the longest reasonable literal
var ti = 0
let maxlen = t.high - "e+000".len # reserve enough space for exponent
result = i - start
i = start
# re-parse without error checking, any error should be handled by the code above.
while s[i] in {'0'..'9','+','-'}:
if ti < maxlen:
t[ti] = s[i]; inc(ti)
inc(i)
while s[i] in {'.', '_'}: # skip underscore and decimal point
inc(i)
# insert exponent
t[ti] = 'E'; inc(ti)
t[ti] = if exp_negative: '-' else: '+'; inc(ti)
inc(ti, 3)
# insert adjusted exponent
t[ti-1] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
t[ti-2] = ('0'.ord + abs_exponent mod 10).char; abs_exponent = abs_exponent div 10
t[ti-3] = ('0'.ord + abs_exponent mod 10).char
number = strtod(t, nil)
proc nimInt64ToStr(x: int64): string {.compilerRtl.} =
result = newString(sizeof(x)*4)
var i = 0
var y = x
while true:
var d = y div 10
result[i] = chr(abs(int(y - d*10)) + ord('0'))
inc(i)
y = d
if y == 0: break
if x < 0:
result[i] = '-'
inc(i)
setLen(result, i)
# mirror the string:
for j in 0..i div 2 - 1:
swap(result[j], result[i-j-1])
proc nimBoolToStr(x: bool): string {.compilerRtl.} =
return if x: "true" else: "false"
proc nimCharToStr(x: char): string {.compilerRtl.} =
result = newString(1)
result[0] = x
proc binaryStrSearch(x: openArray[string], y: string): int {.compilerproc.} =
var
a = 0
b = len(x)
while a < b:
var mid = (a + b) div 2
if x[mid] < y:
a = mid + 1
else:
b = mid
if a < len(x) and x[a] == y:
result = a
else:
result = -1