mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-04 12:07:51 +00:00
ARC now capable of custom extra alignment. Ref, closure and seq support. (#15697)
(cherry picked from commit 0956a99537)
This commit is contained in:
@@ -1278,11 +1278,11 @@ proc rawGenNew(p: BProc, a: var TLoc, sizeExpr: Rope; needsInit: bool) =
|
||||
|
||||
if optTinyRtti in p.config.globalOptions:
|
||||
if needsInit:
|
||||
b.r = ropecg(p.module, "($1) #nimNewObj($2)",
|
||||
[getTypeDesc(p.module, typ), sizeExpr])
|
||||
b.r = ropecg(p.module, "($1) #nimNewObj($2, NIM_ALIGNOF($3))",
|
||||
[getTypeDesc(p.module, typ), sizeExpr, getTypeDesc(p.module, bt)])
|
||||
else:
|
||||
b.r = ropecg(p.module, "($1) #nimNewObjUninit($2)",
|
||||
[getTypeDesc(p.module, typ), sizeExpr])
|
||||
b.r = ropecg(p.module, "($1) #nimNewObjUninit($2, NIM_ALIGNOF($3))",
|
||||
[getTypeDesc(p.module, typ), sizeExpr, getTypeDesc(p.module, bt)])
|
||||
genAssignment(p, a, b, {})
|
||||
else:
|
||||
let ti = genTypeInfoV1(p.module, typ, a.lode.info)
|
||||
@@ -2191,14 +2191,10 @@ proc genDestroy(p: BProc; n: PNode) =
|
||||
of tySequence:
|
||||
var a: TLoc
|
||||
initLocExpr(p, arg, a)
|
||||
if optThreads in p.config.globalOptions:
|
||||
linefmt(p, cpsStmts, "if ($1.p && !($1.p->cap & NIM_STRLIT_FLAG)) {$n" &
|
||||
" #deallocShared($1.p);$n" &
|
||||
"}$n", [rdLoc(a)])
|
||||
else:
|
||||
linefmt(p, cpsStmts, "if ($1.p && !($1.p->cap & NIM_STRLIT_FLAG)) {$n" &
|
||||
" #dealloc($1.p);$n" &
|
||||
"}$n", [rdLoc(a)])
|
||||
linefmt(p, cpsStmts, "if ($1.p && !($1.p->cap & NIM_STRLIT_FLAG)) {$n" &
|
||||
" #alignedDealloc($1.p, NIM_ALIGNOF($2));$n" &
|
||||
"}$n",
|
||||
[rdLoc(a), getTypeDesc(p.module, t.lastSon)])
|
||||
else: discard "nothing to do"
|
||||
else:
|
||||
let t = n[1].typ.skipTypes(abstractVar)
|
||||
@@ -2217,7 +2213,7 @@ proc genDispose(p: BProc; n: PNode) =
|
||||
if elemType.destructor != nil:
|
||||
var destroyCall = newNodeI(nkCall, n.info)
|
||||
genStmts(p, destroyCall)
|
||||
lineCg(p, cpsStmts, ["#nimRawDispose($#)", rdLoc(a)])
|
||||
lineFmt(p, cpsStmts, "#nimRawDispose($1, NIM_ALIGNOF($2))", [rdLoc(a), getTypeDesc(p.module, elemType)])
|
||||
else:
|
||||
# ``nimRawDisposeVirtual`` calls the ``finalizer`` which is the same as the
|
||||
# destructor, but it uses the runtime type. Afterwards the memory is freed:
|
||||
|
||||
@@ -513,7 +513,9 @@ proc atomicRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
|
||||
if isFinal(elemType):
|
||||
addDestructorCall(c, elemType, actions, genDeref(x, nkDerefExpr))
|
||||
actions.add callCodegenProc(c.g, "nimRawDispose", c.info, x)
|
||||
var alignOf = genBuiltin(c.g, mAlignOf, "alignof", newNodeIT(nkType, c.info, elemType))
|
||||
alignOf.typ = getSysType(c.g, c.info, tyInt)
|
||||
actions.add callCodegenProc(c.g, "nimRawDispose", c.info, x, alignOf)
|
||||
else:
|
||||
addDestructorCall(c, elemType, newNodeI(nkStmtList, c.info), genDeref(x, nkDerefExpr))
|
||||
actions.add callCodegenProc(c.g, "nimDestroyAndDispose", c.info, x)
|
||||
@@ -636,7 +638,9 @@ proc ownedRefOp(c: var TLiftCtx; t: PType; body, x, y: PNode) =
|
||||
|
||||
if isFinal(elemType):
|
||||
addDestructorCall(c, elemType, actions, genDeref(x, nkDerefExpr))
|
||||
actions.add callCodegenProc(c.g, "nimRawDispose", c.info, x)
|
||||
var alignOf = genBuiltin(c.g, mAlignOf, "alignof", newNodeIT(nkType, c.info, elemType))
|
||||
alignOf.typ = getSysType(c.g, c.info, tyInt)
|
||||
actions.add callCodegenProc(c.g, "nimRawDispose", c.info, x, alignOf)
|
||||
else:
|
||||
addDestructorCall(c, elemType, newNodeI(nkStmtList, c.info), genDeref(x, nkDerefExpr))
|
||||
actions.add callCodegenProc(c.g, "nimDestroyAndDispose", c.info, x)
|
||||
|
||||
@@ -108,7 +108,7 @@ when not defined(gcDestructors):
|
||||
proc newSeq(typ: PNimType, len: int): pointer {.importCompilerProc.}
|
||||
proc objectInit(dest: pointer, typ: PNimType) {.importCompilerProc.}
|
||||
else:
|
||||
proc nimNewObj(size: int): pointer {.importCompilerProc.}
|
||||
proc nimNewObj(size, align: int): pointer {.importCompilerProc.}
|
||||
proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.importCompilerProc.}
|
||||
proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): pointer {.
|
||||
importCompilerProc.}
|
||||
@@ -178,7 +178,7 @@ proc invokeNew*(x: Any) =
|
||||
## performs ``new(x)``. `x` needs to represent a ``ref``.
|
||||
assert x.rawType.kind == tyRef
|
||||
when defined(gcDestructors):
|
||||
cast[ppointer](x.value)[] = nimNewObj(x.rawType.base.size)
|
||||
cast[ppointer](x.value)[] = nimNewObj(x.rawType.base.size, x.rawType.base.align)
|
||||
else:
|
||||
var z = newObj(x.rawType, x.rawType.base.size)
|
||||
genericAssign(x.value, addr(z), x.rawType)
|
||||
|
||||
@@ -75,14 +75,13 @@ when defined(nimArcDebug):
|
||||
elif defined(nimArcIds):
|
||||
var gRefId: int
|
||||
|
||||
proc nimNewObj(size: int): pointer {.compilerRtl.} =
|
||||
let s = size + sizeof(RefHeader)
|
||||
proc nimNewObj(size, alignment: int): pointer {.compilerRtl.} =
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
let s = size + hdrSize
|
||||
when defined(nimscript):
|
||||
discard
|
||||
elif compileOption("threads"):
|
||||
result = allocShared0(s) +! sizeof(RefHeader)
|
||||
else:
|
||||
result = alloc0(s) +! sizeof(RefHeader)
|
||||
result = alignedAlloc0(s, alignment) +! hdrSize
|
||||
when defined(nimArcDebug) or defined(nimArcIds):
|
||||
head(result).refId = gRefId
|
||||
atomicInc gRefId
|
||||
@@ -92,20 +91,18 @@ proc nimNewObj(size: int): pointer {.compilerRtl.} =
|
||||
when traceCollector:
|
||||
cprintf("[Allocated] %p result: %p\n", result -! sizeof(RefHeader), result)
|
||||
|
||||
proc nimNewObjUninit(size: int): pointer {.compilerRtl.} =
|
||||
proc nimNewObjUninit(size, alignment: int): pointer {.compilerRtl.} =
|
||||
# Same as 'newNewObj' but do not initialize the memory to zero.
|
||||
# The codegen proved for us that this is not necessary.
|
||||
let s = size + sizeof(RefHeader)
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
let s = size + hdrSize
|
||||
when defined(nimscript):
|
||||
discard
|
||||
elif compileOption("threads"):
|
||||
var orig = cast[ptr RefHeader](allocShared(s))
|
||||
else:
|
||||
var orig = cast[ptr RefHeader](alloc(s))
|
||||
orig.rc = 0
|
||||
result = cast[ptr RefHeader](alignedAlloc0(s, alignment) +! hdrSize)
|
||||
head(result).rc = 0
|
||||
when defined(gcOrc):
|
||||
orig.rootIdx = 0
|
||||
result = orig +! sizeof(RefHeader)
|
||||
head(result).rootIdx = 0
|
||||
when defined(nimArcDebug):
|
||||
head(result).refId = gRefId
|
||||
atomicInc gRefId
|
||||
@@ -147,7 +144,7 @@ when not defined(nimscript) and defined(nimArcDebug):
|
||||
else:
|
||||
result = 0
|
||||
|
||||
proc nimRawDispose(p: pointer) {.compilerRtl.} =
|
||||
proc nimRawDispose(p: pointer, alignment: int) {.compilerRtl.} =
|
||||
when not defined(nimscript):
|
||||
when traceCollector:
|
||||
cprintf("[Freed] %p\n", p -! sizeof(RefHeader))
|
||||
@@ -155,27 +152,21 @@ proc nimRawDispose(p: pointer) {.compilerRtl.} =
|
||||
if head(p).rc >= rcIncrement:
|
||||
cstderr.rawWrite "[FATAL] dangling references exist\n"
|
||||
quit 1
|
||||
|
||||
when defined(gcOrc) and defined(nimArcDebug):
|
||||
if (head(p).rc and 0b100) != 0:
|
||||
cstderr.rawWrite "[FATAL] cycle root freed\n"
|
||||
quit 1
|
||||
|
||||
when defined(nimArcDebug):
|
||||
# we do NOT really free the memory here in order to reliably detect use-after-frees
|
||||
if freedCells.data == nil: init(freedCells)
|
||||
freedCells.incl head(p)
|
||||
elif compileOption("threads"):
|
||||
deallocShared(p -! sizeof(RefHeader))
|
||||
else:
|
||||
dealloc(p -! sizeof(RefHeader))
|
||||
let hdrSize = align(sizeof(RefHeader), alignment)
|
||||
alignedDealloc(p -! hdrSize, alignment)
|
||||
|
||||
template dispose*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x))
|
||||
template dispose*[T](x: owned(ref T)) = nimRawDispose(cast[pointer](x), T.alignOf)
|
||||
#proc dispose*(x: pointer) = nimRawDispose(x)
|
||||
|
||||
proc nimDestroyAndDispose(p: pointer) {.compilerRtl, raises: [].} =
|
||||
let d = cast[ptr PNimTypeV2](p)[].destructor
|
||||
if d != nil: cast[DestructorProc](d)(p)
|
||||
let rti = cast[ptr PNimTypeV2](p)
|
||||
if rti.destructor != nil:
|
||||
cast[DestructorProc](rti.destructor)(p)
|
||||
when false:
|
||||
cstderr.rawWrite cast[ptr PNimTypeV2](p)[].name
|
||||
cstderr.rawWrite "\n"
|
||||
@@ -183,7 +174,7 @@ proc nimDestroyAndDispose(p: pointer) {.compilerRtl, raises: [].} =
|
||||
cstderr.rawWrite "bah, nil\n"
|
||||
else:
|
||||
cstderr.rawWrite "has destructor!\n"
|
||||
nimRawDispose(p)
|
||||
nimRawDispose(p, rti.align)
|
||||
|
||||
when defined(gcOrc):
|
||||
when defined(nimThinout):
|
||||
@@ -216,7 +207,7 @@ proc GC_unref*[T](x: ref T) =
|
||||
if nimDecRefIsLast(cast[pointer](x)):
|
||||
# XXX this does NOT work for virtual destructors!
|
||||
`=destroy`(x[])
|
||||
nimRawDispose(cast[pointer](x))
|
||||
nimRawDispose(cast[pointer](x), T.alignOf)
|
||||
|
||||
proc GC_ref*[T](x: ref T) =
|
||||
## New runtime only supports this operation for 'ref T'.
|
||||
|
||||
@@ -15,7 +15,11 @@ const
|
||||
PageSize = 1 shl PageShift
|
||||
PageMask = PageSize-1
|
||||
|
||||
MemAlign = 16 # also minimal allocatable memory block
|
||||
MemAlign = # also minimal allocatable memory block
|
||||
when defined(useMalloc):
|
||||
when defined(amd64): 16
|
||||
else: 8
|
||||
else: 16
|
||||
|
||||
BitsPerPage = PageSize div MemAlign
|
||||
UnitsPerPage = BitsPerPage div (sizeof(int)*8)
|
||||
|
||||
@@ -163,7 +163,7 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) =
|
||||
when defined(nimSeqsV2):
|
||||
let typ = if mt.base.kind == tyObject: cast[PNimType](cast[ptr PNimTypeV2](s2)[].typeInfoV1)
|
||||
else: mt.base
|
||||
let z = nimNewObj(typ.size)
|
||||
let z = nimNewObj(typ.size, typ.align)
|
||||
cast[PPointer](dest)[] = z
|
||||
else:
|
||||
# this version should work for any other GC:
|
||||
|
||||
@@ -304,6 +304,87 @@ when hasAlloc and not defined(js):
|
||||
## or other memory may be corrupted.
|
||||
deallocShared(p)
|
||||
|
||||
include bitmasks
|
||||
|
||||
template `+!`(p: pointer, s: SomeInteger): pointer =
|
||||
cast[pointer](cast[int](p) +% int(s))
|
||||
|
||||
template `-!`(p: pointer, s: SomeInteger): pointer =
|
||||
cast[pointer](cast[int](p) -% int(s))
|
||||
|
||||
proc alignedAlloc(size, align: Natural): pointer =
|
||||
if align <= MemAlign:
|
||||
when compileOption("threads"):
|
||||
result = allocShared(size)
|
||||
else:
|
||||
result = alloc(size)
|
||||
else:
|
||||
# allocate (size + align - 1) necessary for alignment,
|
||||
# plus 2 bytes to store offset
|
||||
when compileOption("threads"):
|
||||
let base = allocShared(size + align - 1 + sizeof(uint16))
|
||||
else:
|
||||
let base = alloc(size + align - 1 + sizeof(uint16))
|
||||
# memory layout: padding + offset (2 bytes) + user_data
|
||||
# in order to deallocate: read offset at user_data - 2 bytes,
|
||||
# then deallocate user_data - offset
|
||||
let offset = align - (cast[int](base) and (align - 1))
|
||||
cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
|
||||
result = base +! offset
|
||||
|
||||
proc alignedAlloc0(size, align: Natural): pointer =
|
||||
if align <= MemAlign:
|
||||
when compileOption("threads"):
|
||||
result = allocShared0(size)
|
||||
else:
|
||||
result = alloc0(size)
|
||||
else:
|
||||
# see comments for alignedAlloc
|
||||
when compileOption("threads"):
|
||||
let base = allocShared0(size + align - 1 + sizeof(uint16))
|
||||
else:
|
||||
let base = alloc0(size + align - 1 + sizeof(uint16))
|
||||
let offset = align - (cast[int](base) and (align - 1))
|
||||
cast[ptr uint16](base +! (offset - sizeof(uint16)))[] = uint16(offset)
|
||||
result = base +! offset
|
||||
|
||||
proc alignedDealloc(p: pointer, align: int) {.compilerproc.} =
|
||||
if align <= MemAlign:
|
||||
when compileOption("threads"):
|
||||
deallocShared(p)
|
||||
else:
|
||||
dealloc(p)
|
||||
else:
|
||||
# read offset at p - 2 bytes, then deallocate (p - offset) pointer
|
||||
let offset = cast[ptr uint16](p -! sizeof(uint16))[]
|
||||
when compileOption("threads"):
|
||||
deallocShared(p -! offset)
|
||||
else:
|
||||
dealloc(p -! offset)
|
||||
|
||||
proc alignedRealloc(p: pointer, oldSize, newSize, align: Natural): pointer =
|
||||
if align <= MemAlign:
|
||||
when compileOption("threads"):
|
||||
result = reallocShared(p, newSize)
|
||||
else:
|
||||
result = realloc(p, newSize)
|
||||
else:
|
||||
result = alignedAlloc(newSize, align)
|
||||
copyMem(result, p, oldSize)
|
||||
alignedDealloc(p, align)
|
||||
|
||||
proc alignedRealloc0(p: pointer, oldSize, newSize, align: Natural): pointer =
|
||||
if align <= MemAlign:
|
||||
when compileOption("threads"):
|
||||
result = reallocShared0(p, oldSize, newSize)
|
||||
else:
|
||||
result = realloc0(p, oldSize, newSize)
|
||||
else:
|
||||
result = alignedAlloc(newSize, align)
|
||||
copyMem(result, p, oldSize)
|
||||
zeroMem(result +! oldSize, newSize - oldSize)
|
||||
alignedDealloc(p, align)
|
||||
|
||||
{.pop.}
|
||||
|
||||
# GC interface:
|
||||
|
||||
@@ -88,7 +88,7 @@ proc free(s: Cell; desc: PNimTypeV2) {.inline.} =
|
||||
else:
|
||||
cstderr.rawWrite "has dispose!\n"
|
||||
|
||||
nimRawDispose(p)
|
||||
nimRawDispose(p, desc.align)
|
||||
|
||||
proc nimTraceRef(q: pointer; desc: PNimTypeV2; env: pointer) {.compilerRtl, inline.} =
|
||||
let p = cast[ptr pointer](q)
|
||||
|
||||
@@ -35,10 +35,7 @@ proc newSeqPayload(cap, elemSize, elemAlign: int): pointer {.compilerRtl, raises
|
||||
# we have to use type erasure here as Nim does not support generic
|
||||
# compilerProcs. Oh well, this will all be inlined anyway.
|
||||
if cap > 0:
|
||||
when compileOption("threads"):
|
||||
var p = cast[ptr NimSeqPayloadBase](allocShared0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize))
|
||||
else:
|
||||
var p = cast[ptr NimSeqPayloadBase](alloc0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize))
|
||||
var p = cast[ptr NimSeqPayloadBase](alignedAlloc0(align(sizeof(NimSeqPayloadBase), elemAlign) + cap * elemSize, elemAlign))
|
||||
p.cap = cap
|
||||
result = p
|
||||
else:
|
||||
@@ -65,20 +62,14 @@ proc prepareSeqAdd(len: int; p: pointer; addlen, elemSize, elemAlign: int): poin
|
||||
let oldCap = p.cap and not strlitFlag
|
||||
let newCap = max(resize(oldCap), len+addlen)
|
||||
if (p.cap and strlitFlag) == strlitFlag:
|
||||
when compileOption("threads"):
|
||||
var q = cast[ptr NimSeqPayloadBase](allocShared0(headerSize + elemSize * newCap))
|
||||
else:
|
||||
var q = cast[ptr NimSeqPayloadBase](alloc0(headerSize + elemSize * newCap))
|
||||
var q = cast[ptr NimSeqPayloadBase](alignedAlloc0(headerSize + elemSize * newCap, elemAlign))
|
||||
copyMem(q +! headerSize, p +! headerSize, len * elemSize)
|
||||
q.cap = newCap
|
||||
result = q
|
||||
else:
|
||||
let oldSize = headerSize + elemSize * oldCap
|
||||
let newSize = headerSize + elemSize * newCap
|
||||
when compileOption("threads"):
|
||||
var q = cast[ptr NimSeqPayloadBase](reallocShared0(p, oldSize, newSize))
|
||||
else:
|
||||
var q = cast[ptr NimSeqPayloadBase](realloc0(p, oldSize, newSize))
|
||||
var q = cast[ptr NimSeqPayloadBase](alignedRealloc0(p, oldSize, newSize, elemAlign))
|
||||
q.cap = newCap
|
||||
result = q
|
||||
|
||||
|
||||
146
tests/arc/thard_alignment.nim
Normal file
146
tests/arc/thard_alignment.nim
Normal file
@@ -0,0 +1,146 @@
|
||||
discard """
|
||||
disabled: "arm64"
|
||||
cmd: "nim c --gc:arc $file"
|
||||
output: "y"
|
||||
"""
|
||||
|
||||
{.passC: "-march=native".}
|
||||
|
||||
proc isAlignedCheck(p: pointer, alignment: int) =
|
||||
doAssert (cast[uint](p) and uint(alignment - 1)) == 0
|
||||
|
||||
proc isAlignedCheck[T](p: ref T, alignment: int) =
|
||||
isAlignedCheck(cast[pointer](p), alignment)
|
||||
|
||||
type
|
||||
m256d {.importc: "__m256d", header: "immintrin.h".} = object
|
||||
|
||||
proc set1(x: float): m256d {.importc: "_mm256_set1_pd", header: "immintrin.h".}
|
||||
func `+`(a,b: m256d): m256d {.importc: "_mm256_add_pd", header: "immintrin.h".}
|
||||
proc `$`(a: m256d): string =
|
||||
result = $(cast[ptr float](a.unsafeAddr)[])
|
||||
|
||||
|
||||
var res: seq[seq[m256d]]
|
||||
|
||||
for _ in 1..1000:
|
||||
var x = newSeq[m256d](1)
|
||||
x[0] = set1(1.0) # test if operation causes segfault
|
||||
isAlignedCheck(x[0].addr, alignof(m256d))
|
||||
res.add x
|
||||
|
||||
var res2: seq[m256d]
|
||||
for i in 1..10000:
|
||||
res2.setLen(res2.len + 1) # check if realloc works
|
||||
isAlignedCheck(res2[0].addr, alignof(m256d))
|
||||
|
||||
proc lambdaGen(a, b: float, z: ref m256d) : auto =
|
||||
var x1 = new(m256d)
|
||||
var x2 = new(m256d)
|
||||
isAlignedCheck(x1, alignof(m256d))
|
||||
isAlignedCheck(x2, alignof(m256d))
|
||||
x1[] = set1(2.0 + a)
|
||||
x2[] = set1(-23.0 - b)
|
||||
let capturingLambda = proc(x: ref m256d): ref m256d =
|
||||
var cc = new(m256d)
|
||||
var bb = new(m256d)
|
||||
isAlignedCheck(x1, alignof(m256d))
|
||||
isAlignedCheck(x2, alignof(m256d))
|
||||
isAlignedCheck(cc, alignof(m256d))
|
||||
isAlignedCheck(bb, alignof(m256d))
|
||||
isAlignedCheck(z, alignof(m256d))
|
||||
|
||||
cc[] = x1[] + x1[] + z[]
|
||||
bb[] = x2[] + set1(12.5) + z[]
|
||||
|
||||
result = new(m256d)
|
||||
isAlignedCheck(result, alignof(m256d))
|
||||
result[] = cc[] + bb[] + x[]
|
||||
|
||||
return capturingLambda
|
||||
|
||||
var xx = new(m256d)
|
||||
xx[] = set1(10)
|
||||
isAlignedCheck(xx, alignOf(m256d))
|
||||
|
||||
let f1 = lambdaGen(2.0 , 2.221, xx)
|
||||
let f2 = lambdaGen(-1.226 , 3.5, xx)
|
||||
isAlignedCheck(f1(xx), alignOf(m256d))
|
||||
isAlignedCheck(f2(xx), alignOf(m256d))
|
||||
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
type
|
||||
MyAligned = object of RootObj
|
||||
a{.align: 128.}: float
|
||||
|
||||
|
||||
var f: MyAligned
|
||||
isAlignedCheck(f.addr, MyAligned.alignOf)
|
||||
|
||||
var fref = new(MyAligned)
|
||||
isAlignedCheck(fref, MyAligned.alignOf)
|
||||
|
||||
var fs: seq[MyAligned]
|
||||
var fr: seq[RootRef]
|
||||
|
||||
for i in 0..1000:
|
||||
fs.add MyAligned()
|
||||
isAlignedCheck(fs[^1].addr, MyAligned.alignOf)
|
||||
fs[^1].a = i.float
|
||||
|
||||
fr.add new(MyAligned)
|
||||
isAlignedCheck(fr[^1], MyAligned.alignOf)
|
||||
((ref MyAligned)fr[^1])[].a = i.float
|
||||
|
||||
for i in 0..1000:
|
||||
doAssert(fs[i].a == i.float)
|
||||
doAssert(((ref MyAligned)fr[i]).a == i.float)
|
||||
|
||||
|
||||
proc lambdaTest2(a: MyAligned, z: ref MyAligned): auto =
|
||||
var x1: MyAligned
|
||||
x1.a = a.a + z.a
|
||||
var x2: MyAligned
|
||||
x2.a = a.a - z.a
|
||||
let capturingLambda = proc(x: MyAligned): MyAligned =
|
||||
var cc: MyAligned
|
||||
var bb: MyAligned
|
||||
isAlignedCheck(x1.addr, MyAligned.alignOf)
|
||||
isAlignedCheck(x2.addr, MyAligned.alignOf)
|
||||
isAlignedCheck(cc.addr, MyAligned.alignOf)
|
||||
isAlignedCheck(bb.addr, MyAligned.alignOf)
|
||||
isAlignedCheck(z, MyAligned.alignOf)
|
||||
|
||||
cc.a = x1.a + x1.a + z.a
|
||||
bb.a = x2.a - z.a
|
||||
|
||||
isAlignedCheck(result.addr, MyAligned.alignOf)
|
||||
result.a = cc.a + bb.a + x2.a
|
||||
|
||||
return capturingLambda
|
||||
|
||||
|
||||
let q1 = lambdaTest2(MyAligned(a: 1.0), (ref MyAligned)(a: 2.0))
|
||||
let q2 = lambdaTest2(MyAligned( a: -1.0), (ref MyAligned)(a: -2.0))
|
||||
|
||||
isAlignedCheck(rawEnv(q1), MyAligned.alignOf)
|
||||
isAlignedCheck(rawEnv(q2), MyAligned.alignOf)
|
||||
discard q1(MyAligned(a: 1.0))
|
||||
discard q2(MyAligned(a: -1.0))
|
||||
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
block:
|
||||
var s: seq[seq[MyAligned]]
|
||||
for len in 0..128:
|
||||
s.add newSeq[MyAligned](len)
|
||||
for i in 0..<len:
|
||||
s[^1][i] = MyAligned(a: 1.0)
|
||||
|
||||
if len > 0:
|
||||
isAlignedCheck(s[^1][0].addr, MyAligned.alignOf)
|
||||
|
||||
echo "y"
|
||||
@@ -1,17 +0,0 @@
|
||||
discard """
|
||||
disabled: true
|
||||
"""
|
||||
|
||||
# does not yet work
|
||||
|
||||
{.passC: "-march=native".}
|
||||
|
||||
type
|
||||
m256d {.importc: "__m256d", header: "immintrin.h".} = object
|
||||
|
||||
proc set1(x: float): m256d {.importc: "_mm256_set1_pd", header: "immintrin.h".}
|
||||
|
||||
for _ in 1..1000:
|
||||
var x = newSeq[m256d](1)
|
||||
x[0] = set1(1.0) # test if operation causes segfault
|
||||
doAssert (cast[uint](x[0].addr) and 31) == 0
|
||||
Reference in New Issue
Block a user