mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-28 17:04:41 +00:00
* Update lib/pure/bitops.nim * Update lib/system/sets.nim * Apply suggestions from code review Co-authored-by: Andreas Rumpf <rumpf_a@web.de>
This commit is contained in:
@@ -27,6 +27,9 @@
|
||||
|
||||
import macros
|
||||
import std/private/since
|
||||
from std/private/vmutils import forwardImpl, toUnsigned
|
||||
|
||||
|
||||
|
||||
func bitnot*[T: SomeInteger](x: T): T {.magic: "BitnotI".}
|
||||
## Computes the `bitwise complement` of the integer `x`.
|
||||
@@ -58,34 +61,6 @@ macro bitxor*[T: SomeInteger](x, y: T; z: varargs[T]): T =
|
||||
for extra in z:
|
||||
result = newCall(fn, result, extra)
|
||||
|
||||
const useBuiltins = not defined(noIntrinsicsBitOpts)
|
||||
const noUndefined = defined(noUndefinedBitOpts)
|
||||
const useGCC_builtins = (defined(gcc) or defined(llvm_gcc) or
|
||||
defined(clang)) and useBuiltins
|
||||
const useICC_builtins = defined(icc) and useBuiltins
|
||||
const useVCC_builtins = defined(vcc) and useBuiltins
|
||||
const arch64 = sizeof(int) == 8
|
||||
const useBuiltinsRotate = (defined(amd64) or defined(i386)) and
|
||||
(defined(gcc) or defined(clang) or defined(vcc) or
|
||||
(defined(icl) and not defined(cpp))) and useBuiltins
|
||||
|
||||
template toUnsigned(x: int8): uint8 = cast[uint8](x)
|
||||
template toUnsigned(x: int16): uint16 = cast[uint16](x)
|
||||
template toUnsigned(x: int32): uint32 = cast[uint32](x)
|
||||
template toUnsigned(x: int64): uint64 = cast[uint64](x)
|
||||
template toUnsigned(x: int): uint = cast[uint](x)
|
||||
|
||||
template forwardImpl(impl, arg) {.dirty.} =
|
||||
when sizeof(x) <= 4:
|
||||
when x is SomeSignedInt:
|
||||
impl(cast[uint32](x.int32))
|
||||
else:
|
||||
impl(x.uint32)
|
||||
else:
|
||||
when x is SomeSignedInt:
|
||||
impl(cast[uint64](x.int64))
|
||||
else:
|
||||
impl(x.uint64)
|
||||
|
||||
type BitsRange*[T] = range[0..sizeof(T)*8-1]
|
||||
## A range with all bit positions for type `T`.
|
||||
@@ -436,13 +411,12 @@ func fastlog2Nim(x: uint64): int {.inline.} =
|
||||
v = v or v shr 32
|
||||
result = lookup[(v * 0x03F6EAF2CD271461'u64) shr 58].int
|
||||
|
||||
# sets.nim cannot import bitops, but bitops can use include
|
||||
# system/sets to eliminate code duplication. sets.nim defines
|
||||
# countBits32 and countBits64.
|
||||
import system/countbits_impl
|
||||
|
||||
template countSetBitsNim(n: uint32): int = countBits32(n)
|
||||
template countSetBitsNim(n: uint64): int = countBits64(n)
|
||||
const arch64 = sizeof(int) == 8
|
||||
const useBuiltinsRotate = (defined(amd64) or defined(i386)) and
|
||||
(defined(gcc) or defined(clang) or defined(vcc) or
|
||||
(defined(icl) and not defined(cpp))) and useBuiltins
|
||||
|
||||
template parityImpl[T](value: T): int =
|
||||
# formula id from: https://graphics.stanford.edu/%7Eseander/bithacks.html#ParityParallel
|
||||
@@ -459,11 +433,6 @@ template parityImpl[T](value: T): int =
|
||||
|
||||
|
||||
when useGCC_builtins:
|
||||
# Returns the number of set 1-bits in value.
|
||||
proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.}
|
||||
proc builtin_popcountll(x: culonglong): cint {.
|
||||
importc: "__builtin_popcountll", cdecl.}
|
||||
|
||||
# Returns the bit parity in value
|
||||
proc builtin_parity(x: cuint): cint {.importc: "__builtin_parity", cdecl.}
|
||||
proc builtin_parityll(x: culonglong): cint {.importc: "__builtin_parityll", cdecl.}
|
||||
@@ -481,14 +450,6 @@ when useGCC_builtins:
|
||||
proc builtin_ctzll(x: culonglong): cint {.importc: "__builtin_ctzll", cdecl.}
|
||||
|
||||
elif useVCC_builtins:
|
||||
# Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
|
||||
func builtin_popcnt16(a2: uint16): uint16 {.
|
||||
importc: "__popcnt16", header: "<intrin.h>".}
|
||||
func builtin_popcnt32(a2: uint32): uint32 {.
|
||||
importc: "__popcnt", header: "<intrin.h>".}
|
||||
func builtin_popcnt64(a2: uint64): uint64 {.
|
||||
importc: "__popcnt64", header: "<intrin.h>".}
|
||||
|
||||
# Search the mask data from most significant bit (MSB) to least significant bit (LSB) for a set bit (1).
|
||||
func bitScanReverse(index: ptr culong, mask: culong): cuchar {.
|
||||
importc: "_BitScanReverse", header: "<intrin.h>".}
|
||||
@@ -507,15 +468,6 @@ elif useVCC_builtins:
|
||||
index.int
|
||||
|
||||
elif useICC_builtins:
|
||||
|
||||
# Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
|
||||
# see also: https://software.intel.com/en-us/node/523362
|
||||
# Count the number of bits set to 1 in an integer a, and return that count in dst.
|
||||
func builtin_popcnt32(a: cint): cint {.
|
||||
importc: "_popcnt", header: "<immintrin.h>".}
|
||||
func builtin_popcnt64(a: uint64): cint {.
|
||||
importc: "_popcnt64", header: "<immintrin.h>".}
|
||||
|
||||
# Returns the number of trailing 0-bits in x, starting at the least significant bit position. If x is 0, the result is undefined.
|
||||
func bitScanForward(p: ptr uint32, b: uint32): cuchar {.
|
||||
importc: "_BitScanForward", header: "<immintrin.h>".}
|
||||
@@ -533,37 +485,13 @@ elif useICC_builtins:
|
||||
discard fnc(index.addr, v)
|
||||
index.int
|
||||
|
||||
|
||||
func countSetBits*(x: SomeInteger): int {.inline.} =
|
||||
## Counts the set bits in an integer (also called `Hamming weight`:idx:).
|
||||
runnableExamples:
|
||||
doAssert countSetBits(0b0000_0011'u8) == 2
|
||||
doAssert countSetBits(0b1010_1010'u8) == 4
|
||||
|
||||
# TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
|
||||
# like GCC and MSVC
|
||||
when x is SomeSignedInt:
|
||||
let x = x.toUnsigned
|
||||
when nimvm:
|
||||
result = forwardImpl(countSetBitsNim, x)
|
||||
else:
|
||||
when useGCC_builtins:
|
||||
when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
|
||||
else: result = builtin_popcountll(x.culonglong).int
|
||||
elif useVCC_builtins:
|
||||
when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int
|
||||
elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int
|
||||
elif arch64: result = builtin_popcnt64(x.uint64).int
|
||||
else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int +
|
||||
builtin_popcnt32((x.uint64 shr 32'u64).uint32).int
|
||||
elif useICC_builtins:
|
||||
when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int
|
||||
elif arch64: result = builtin_popcnt64(x.uint64).int
|
||||
else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int +
|
||||
builtin_popcnt32((x.uint64 shr 32'u64).cint).int
|
||||
else:
|
||||
when sizeof(x) <= 4: result = countSetBitsNim(x.uint32)
|
||||
else: result = countSetBitsNim(x.uint64)
|
||||
result = countSetBitsImpl(x)
|
||||
|
||||
func popcount*(x: SomeInteger): int {.inline.} =
|
||||
## Alias for `countSetBits <#countSetBits,SomeInteger>`_ (Hamming weight).
|
||||
|
||||
17
lib/std/private/vmutils.nim
Normal file
17
lib/std/private/vmutils.nim
Normal file
@@ -0,0 +1,17 @@
|
||||
template forwardImpl*(impl, arg) {.dirty.} =
|
||||
when sizeof(x) <= 4:
|
||||
when x is SomeSignedInt:
|
||||
impl(cast[uint32](x.int32))
|
||||
else:
|
||||
impl(x.uint32)
|
||||
else:
|
||||
when x is SomeSignedInt:
|
||||
impl(cast[uint64](x.int64))
|
||||
else:
|
||||
impl(x.uint64)
|
||||
|
||||
template toUnsigned*(x: int8): uint8 = cast[uint8](x)
|
||||
template toUnsigned*(x: int16): uint16 = cast[uint16](x)
|
||||
template toUnsigned*(x: int32): uint32 = cast[uint32](x)
|
||||
template toUnsigned*(x: int64): uint64 = cast[uint64](x)
|
||||
template toUnsigned*(x: int): uint = cast[uint](x)
|
||||
@@ -9,17 +9,86 @@
|
||||
|
||||
## Contains the used algorithms for counting bits.
|
||||
|
||||
proc countBits32*(n: uint32): int {.compilerproc.} =
|
||||
from std/private/vmutils import forwardImpl, toUnsigned
|
||||
|
||||
|
||||
const useBuiltins* = not defined(noIntrinsicsBitOpts)
|
||||
const noUndefined* = defined(noUndefinedBitOpts)
|
||||
const useGCC_builtins* = (defined(gcc) or defined(llvm_gcc) or
|
||||
defined(clang)) and useBuiltins
|
||||
const useICC_builtins* = defined(icc) and useBuiltins
|
||||
const useVCC_builtins* = defined(vcc) and useBuiltins
|
||||
|
||||
template countBitsImpl(n: uint32): int =
|
||||
# generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
var v = uint32(n)
|
||||
v = v - ((v shr 1'u32) and 0x55555555'u32)
|
||||
v = (v and 0x33333333'u32) + ((v shr 2'u32) and 0x33333333'u32)
|
||||
result = (((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int
|
||||
(((v + (v shr 4'u32) and 0xF0F0F0F'u32) * 0x1010101'u32) shr 24'u32).int
|
||||
|
||||
proc countBits64*(n: uint64): int {.compilerproc, inline.} =
|
||||
template countBitsImpl(n: uint64): int =
|
||||
# generic formula is from: https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
|
||||
var v = uint64(n)
|
||||
v = v - ((v shr 1'u64) and 0x5555555555555555'u64)
|
||||
v = (v and 0x3333333333333333'u64) + ((v shr 2'u64) and 0x3333333333333333'u64)
|
||||
v = (v + (v shr 4'u64) and 0x0F0F0F0F0F0F0F0F'u64)
|
||||
result = ((v * 0x0101010101010101'u64) shr 56'u64).int
|
||||
((v * 0x0101010101010101'u64) shr 56'u64).int
|
||||
|
||||
|
||||
when useGCC_builtins:
|
||||
# Returns the number of set 1-bits in value.
|
||||
proc builtin_popcount(x: cuint): cint {.importc: "__builtin_popcount", cdecl.}
|
||||
proc builtin_popcountll(x: culonglong): cint {.
|
||||
importc: "__builtin_popcountll", cdecl.}
|
||||
|
||||
elif useVCC_builtins:
|
||||
# Counts the number of one bits (population count) in a 16-, 32-, or 64-byte unsigned integer.
|
||||
func builtin_popcnt16(a2: uint16): uint16 {.
|
||||
importc: "__popcnt16", header: "<intrin.h>".}
|
||||
func builtin_popcnt32(a2: uint32): uint32 {.
|
||||
importc: "__popcnt", header: "<intrin.h>".}
|
||||
func builtin_popcnt64(a2: uint64): uint64 {.
|
||||
importc: "__popcnt64", header: "<intrin.h>".}
|
||||
|
||||
elif useICC_builtins:
|
||||
# Intel compiler intrinsics: http://fulla.fnal.gov/intel/compiler_c/main_cls/intref_cls/common/intref_allia_misc.htm
|
||||
# see also: https://software.intel.com/en-us/node/523362
|
||||
# Count the number of bits set to 1 in an integer a, and return that count in dst.
|
||||
func builtin_popcnt32(a: cint): cint {.
|
||||
importc: "_popcnt", header: "<immintrin.h>".}
|
||||
func builtin_popcnt64(a: uint64): cint {.
|
||||
importc: "_popcnt64", header: "<immintrin.h>".}
|
||||
|
||||
|
||||
func countSetBitsImpl*(x: SomeInteger): int {.inline.} =
|
||||
## Counts the set bits in an integer (also called `Hamming weight`:idx:).
|
||||
# TODO: figure out if ICC support _popcnt32/_popcnt64 on platform without POPCNT.
|
||||
# like GCC and MSVC
|
||||
when x is SomeSignedInt:
|
||||
let x = x.toUnsigned
|
||||
when nimvm:
|
||||
result = forwardImpl(countBitsImpl, x)
|
||||
else:
|
||||
when useGCC_builtins:
|
||||
when sizeof(x) <= 4: result = builtin_popcount(x.cuint).int
|
||||
else: result = builtin_popcountll(x.culonglong).int
|
||||
elif useVCC_builtins:
|
||||
when sizeof(x) <= 2: result = builtin_popcnt16(x.uint16).int
|
||||
elif sizeof(x) <= 4: result = builtin_popcnt32(x.uint32).int
|
||||
elif arch64: result = builtin_popcnt64(x.uint64).int
|
||||
else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).uint32).int +
|
||||
builtin_popcnt32((x.uint64 shr 32'u64).uint32).int
|
||||
elif useICC_builtins:
|
||||
when sizeof(x) <= 4: result = builtin_popcnt32(x.cint).int
|
||||
elif arch64: result = builtin_popcnt64(x.uint64).int
|
||||
else: result = builtin_popcnt32((x.uint64 and 0xFFFFFFFF'u64).cint).int +
|
||||
builtin_popcnt32((x.uint64 shr 32'u64).cint).int
|
||||
else:
|
||||
when sizeof(x) <= 4: result = countBitsImpl(x.uint32)
|
||||
else: result = countBitsImpl(x.uint64)
|
||||
|
||||
proc countBits32*(n: uint32): int {.compilerproc, inline.} =
|
||||
result = countSetBitsImpl(n)
|
||||
|
||||
proc countBits64*(n: uint64): int {.compilerproc, inline.} =
|
||||
result = countSetBitsImpl(n)
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
type
|
||||
NimSet = array[0..4*2048-1, uint8]
|
||||
|
||||
# bitops can't be imported here, therefore the code duplication.
|
||||
|
||||
proc cardSet(s: NimSet, len: int): int {.compilerproc, inline.} =
|
||||
var i = 0
|
||||
|
||||
Reference in New Issue
Block a user