Use -d:builtinOverflow for builtin overflow checks

of recent Clang and GCC >= 5.0.

Maybe it's better that it's optional after all. GCC's code with builtin
overflow checks seems to be a bit slower actually, while Clang's is 3
times faster.
This commit is contained in:
def
2015-05-08 03:58:44 +02:00
parent 0e8e574961
commit fef21e9003

View File

@@ -8,75 +8,6 @@
#
# Only clang has __has_builtin (so far)
#
# TODO: This is emitted at the wrong position so we don't actually have an
# emit. Could we add this to nimbase.h instead?
{.emit: """#ifndef __has_builtin
#define __has_builtin(x) 0
#endif""".}
# Builtin compiler functions for improved performance
proc checkFunction(name: string): string =
"((__has_builtin(__builtin_" & name & "_overflow)) || __GNUC__ >= 5)"
# TODO: This is totally ugly. But we can't reliably detect this from Nim,
# especially with cross-compiling where the user may be using an older compiler
# version. Switching this on/off manually with a define seems weird as well.
when sizeof(clong) == 8:
const hasAddInt64Overflow = checkFunction("saddl")
proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_saddl_overflow", nodecl, nosideeffect.}
const hasSubInt64Overflow = checkFunction("ssubl")
proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.}
const hasMulInt64Overflow = checkFunction("smull")
proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_smull_overflow", nodecl, nosideeffect.}
elif sizeof(clonglong) == 8:
const hasAddInt64Overflow = checkFunction("saddll")
proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_saddll_overflow", nodecl, nosideeffect.}
const hasSubInt64Overflow = checkFunction("ssubll")
proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.}
const hasMulInt64Overflow = checkFunction("smulll")
proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_smulll_overflow", nodecl, nosideeffect.}
when sizeof(int) == 8:
const hasAddIntOverflow = hasAddInt64Overflow
proc addIntOverflow(a, b: int, c: var int): bool {.inline.} =
addInt64Overflow(a, b, c)
const hasSubIntOverflow = hasSubInt64Overflow
proc subIntOverflow(a, b: int, c: var int): bool {.inline.} =
subInt64Overflow(a, b, c)
const hasMulIntOverflow = hasMulInt64Overflow
proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} =
mulInt64Overflow(a, b, c)
elif sizeof(int) == 4 and sizeof(cint) == 4:
const hasAddIntOverflow = checkFunction("sadd")
proc addIntOverflow(a, b: int, c: var int): bool {.
importc: "__builtin_sadd_overflow", nodecl, nosideeffect.}
const hasSubIntOverflow = checkFunction("ssub")
proc subIntOverflow(a, b: int, c: var int): bool {.
importc: "__builtin_ssub_overflow", nodecl, nosideeffect.}
const hasMulIntOverflow = checkFunction("smul")
proc mulIntOverflow(a, b: int, c: var int): bool {.
importc: "__builtin_smul_overflow", nodecl, nosideeffect.}
# simple integer arithmetic with overflow checking
proc raiseOverflow {.compilerproc, noinline, noreturn.} =
@@ -86,27 +17,114 @@ proc raiseOverflow {.compilerproc, noinline, noreturn.} =
proc raiseDivByZero {.compilerproc, noinline, noreturn.} =
sysFatal(DivByZeroError, "division by zero")
proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
{.emit: "#if `hasAddInt64Overflow`".}
if addInt64Overflow(a, b, result):
raiseOverflow()
{.emit: "#else".}
result = a +% b
if (result xor a) >= int64(0) or (result xor b) >= int64(0):
return result
raiseOverflow()
{.emit: "#endif".}
when defined(builtinOverflow):
# Builtin compiler functions for improved performance
when sizeof(clong) == 8:
proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_saddl_overflow", nodecl, nosideeffect.}
proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
{.emit: "#if `hasSubInt64Overflow`".}
if subInt64Overflow(a, b, result):
proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_ssubl_overflow", nodecl, nosideeffect.}
proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_smull_overflow", nodecl, nosideeffect.}
elif sizeof(clonglong) == 8:
proc addInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_saddll_overflow", nodecl, nosideeffect.}
proc subInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_ssubll_overflow", nodecl, nosideeffect.}
proc mulInt64Overflow[T: int64|int](a, b: T, c: var T): bool {.
importc: "__builtin_smulll_overflow", nodecl, nosideeffect.}
when sizeof(int) == 8:
proc addIntOverflow(a, b: int, c: var int): bool {.inline.} =
addInt64Overflow(a, b, c)
proc subIntOverflow(a, b: int, c: var int): bool {.inline.} =
subInt64Overflow(a, b, c)
proc mulIntOverflow(a, b: int, c: var int): bool {.inline.} =
mulInt64Overflow(a, b, c)
elif sizeof(int) == 4 and sizeof(cint) == 4:
proc addIntOverflow(a, b: int, c: var int): bool {.
importc: "__builtin_sadd_overflow", nodecl, nosideeffect.}
proc subIntOverflow(a, b: int, c: var int): bool {.
importc: "__builtin_ssub_overflow", nodecl, nosideeffect.}
proc mulIntOverflow(a, b: int, c: var int): bool {.
importc: "__builtin_smul_overflow", nodecl, nosideeffect.}
proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
if addInt64Overflow(a, b, result):
raiseOverflow()
proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
if subInt64Overflow(a, b, result):
raiseOverflow()
proc mulInt64(a, b: int64): int64 {.compilerproc, inline.} =
if mulInt64Overflow(a, b, result):
raiseOverflow()
else:
proc addInt64(a, b: int64): int64 {.compilerProc, inline.} =
result = a +% b
if (result xor a) >= int64(0) or (result xor b) >= int64(0):
return result
raiseOverflow()
proc subInt64(a, b: int64): int64 {.compilerProc, inline.} =
result = a -% b
if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
return result
raiseOverflow()
#
# This code has been inspired by Python's source code.
# The native int product x*y is either exactly right or *way* off, being
# just the last n bits of the true product, where n is the number of bits
# in an int (the delivered product is the true product plus i*2**n for
# some integer i).
#
# The native float64 product x*y is subject to three
# rounding errors: on a sizeof(int)==8 box, each cast to double can lose
# info, and even on a sizeof(int)==4 box, the multiplication can lose info.
# But, unlike the native int product, it's not in *range* trouble: even
# if sizeof(int)==32 (256-bit ints), the product easily fits in the
# dynamic range of a float64. So the leading 50 (or so) bits of the float64
# product are correct.
#
# We check these two ways against each other, and declare victory if they're
# approximately the same. Else, because the native int product is the only
# one that can lose catastrophic amounts of information, it's the native int
# product that must have overflowed.
#
proc mulInt64(a, b: int64): int64 {.compilerproc.} =
var
resAsFloat, floatProd: float64
result = a *% b
floatProd = toBiggestFloat(a) # conversion
floatProd = floatProd * toBiggestFloat(b)
resAsFloat = toBiggestFloat(result)
# Fast path for normal case: small multiplicands, and no info
# is lost in either method.
if resAsFloat == floatProd: return result
# Somebody somewhere lost info. Close enough, or way off? Note
# that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
# The difference either is or isn't significant compared to the
# true value (of which floatProd is a good approximation).
# abs(diff)/abs(prod) <= 1/32 iff
# 32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
return result
raiseOverflow()
{.emit: "#else".}
result = a -% b
if (result xor a) >= int64(0) or (result xor not b) >= int64(0):
return result
raiseOverflow()
{.emit: "#endif".}
proc negInt64(a: int64): int64 {.compilerProc, inline.} =
if a != low(int64): return -a
@@ -130,55 +148,6 @@ proc modInt64(a, b: int64): int64 {.compilerProc, inline.} =
raiseDivByZero()
return a mod b
#
# This code has been inspired by Python's source code.
# The native int product x*y is either exactly right or *way* off, being
# just the last n bits of the true product, where n is the number of bits
# in an int (the delivered product is the true product plus i*2**n for
# some integer i).
#
# The native float64 product x*y is subject to three
# rounding errors: on a sizeof(int)==8 box, each cast to double can lose
# info, and even on a sizeof(int)==4 box, the multiplication can lose info.
# But, unlike the native int product, it's not in *range* trouble: even
# if sizeof(int)==32 (256-bit ints), the product easily fits in the
# dynamic range of a float64. So the leading 50 (or so) bits of the float64
# product are correct.
#
# We check these two ways against each other, and declare victory if they're
# approximately the same. Else, because the native int product is the only
# one that can lose catastrophic amounts of information, it's the native int
# product that must have overflowed.
#
proc mulInt64(a, b: int64): int64 {.compilerproc.} =
{.emit: "#if `hasMulInt64Overflow`".}
if mulInt64Overflow(a, b, result):
raiseOverflow()
{.emit: "#else".}
var
resAsFloat, floatProd: float64
result = a *% b
floatProd = toBiggestFloat(a) # conversion
floatProd = floatProd * toBiggestFloat(b)
resAsFloat = toBiggestFloat(result)
# Fast path for normal case: small multiplicands, and no info
# is lost in either method.
if resAsFloat == floatProd: return result
# Somebody somewhere lost info. Close enough, or way off? Note
# that a != 0 and b != 0 (else resAsFloat == floatProd == 0).
# The difference either is or isn't significant compared to the
# true value (of which floatProd is a good approximation).
# abs(diff)/abs(prod) <= 1/32 iff
# 32 * abs(diff) <= abs(prod) -- 5 good bits is "close enough"
if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
return result
raiseOverflow()
{.emit: "#endif".}
proc absInt(a: int): int {.compilerProc, inline.} =
if a != low(int):
if a >= 0: return a
@@ -330,30 +299,35 @@ elif false: # asmVersion and (defined(gcc) or defined(llvm_gcc)):
:"%edx"
"""
when not declared(addInt) and defined(builtinOverflow):
proc addInt(a, b: int): int {.compilerProc, inline.} =
if addIntOverflow(a, b, result):
raiseOverflow()
when not declared(subInt) and defined(builtinOverflow):
proc subInt(a, b: int): int {.compilerProc, inline.} =
if subIntOverflow(a, b, result):
raiseOverflow()
when not declared(mulInt) and defined(builtinOverflow):
proc mulInt(a, b: int): int {.compilerProc, inline.} =
if mulIntOverflow(a, b, result):
raiseOverflow()
# Platform independent versions of the above (slower!)
when not declared(addInt):
proc addInt(a, b: int): int {.compilerProc, inline.} =
{.emit: "#if `hasAddIntOverflow`".}
if addIntOverflow(a, b, result):
raiseOverflow()
{.emit: "#else".}
result = a +% b
if (result xor a) >= 0 or (result xor b) >= 0:
return result
raiseOverflow()
{.emit: "#endif".}
when not declared(subInt):
proc subInt(a, b: int): int {.compilerProc, inline.} =
{.emit: "#if `hasSubIntOverflow`".}
if subIntOverflow(a, b, result):
raiseOverflow()
{.emit: "#else".}
result = a -% b
if (result xor a) >= 0 or (result xor not b) >= 0:
return result
raiseOverflow()
{.emit: "#endif".}
when not declared(negInt):
proc negInt(a: int): int {.compilerProc, inline.} =
@@ -396,10 +370,6 @@ when not declared(mulInt):
# native int product that must have overflowed.
#
proc mulInt(a, b: int): int {.compilerProc.} =
{.emit: "#if `hasMulIntOverflow`".}
if mulIntOverflow(a, b, result):
raiseOverflow()
{.emit: "#else".}
var
resAsFloat, floatProd: float
@@ -421,7 +391,6 @@ when not declared(mulInt):
if 32.0 * abs(resAsFloat - floatProd) <= abs(floatProd):
return result
raiseOverflow()
{.emit: "#endif".}
# We avoid setting the FPU control word here for compatibility with libraries
# written in other languages.