From f7bdec6f0de35ba1e92de7b3bf04ebca98f48c59 Mon Sep 17 00:00:00 2001 From: ringabout <43030857+ringabout@users.noreply.github.com> Date: Wed, 6 Dec 2023 14:25:34 +0000 Subject: [PATCH] progress --- compiler/ccgexprs.nim | 5 ++++- compiler/ccgliterals.nim | 43 ++++++++++++++++++++++++++++++++++++++++ compiler/ccgtypes.nim | 3 +++ lib/system.nim | 11 ++++++++-- lib/system/assign.nim | 12 +++++++++-- lib/system/deepcopy.nim | 6 +++++- lib/system/strs_v3.nim | 37 ++++++++++++++++++---------------- 7 files changed, 94 insertions(+), 23 deletions(-) diff --git a/compiler/ccgexprs.nim b/compiler/ccgexprs.nim index 2612c5c121..fad963af41 100644 --- a/compiler/ccgexprs.nim +++ b/compiler/ccgexprs.nim @@ -3437,7 +3437,10 @@ proc genBracedInit(p: BProc, n: PNode; isConst: bool; optionalType: PType; resul genConstObjConstr(p, n, isConst, result) of tyString, tyCstring: if optSeqDestructors in p.config.globalOptions and n.kind != nkNilLit and ty == tyString: - genStringLiteralV2Const(p.module, n, isConst, result) + if p.config.isDefined("nimSeqsV3"): + genStringLiteralV3Const(p.module, n, isConst, result) + else: + genStringLiteralV2Const(p.module, n, isConst, result) else: var d: TLoc = initLocExpr(p, n) result.add rdLoc(d) diff --git a/compiler/ccgliterals.nim b/compiler/ccgliterals.nim index cbef6771f6..cdc4348fac 100644 --- a/compiler/ccgliterals.nim +++ b/compiler/ccgliterals.nim @@ -94,6 +94,44 @@ proc genStringLiteralV2Const(m: BModule; n: PNode; isConst: bool; result: var Ro pureLit = m.tmpBase & rope(id) result.addf "{$1, (NimStrPayload*)&$2}", [rope(n.strVal.len), pureLit] +# ------ Version 3: destructor based strings and seqs ----------------------- +# strings are enhanced by interned strings + +proc genStringLiteralDataOnlyV3(m: BModule, s: string; result: Rope; isConst: bool) = + m.s[cfsStrData].addf("static $4 NIM_CHAR $1[$2] = $3;$n", + [result, rope(s.len), makeCString(s), + rope(if isConst: "const" else: "")]) + +proc genStringLiteralV3(m: BModule; n: PNode; isConst: bool; result: var Rope) = + let id = nodeTableTestOrSet(m.dataCache, n, m.labels) + if id == m.labels: + let pureLit = getTempName(m) + genStringLiteralDataOnlyV3(m, n.strVal, pureLit, isConst) + let tmp = getTempName(m) + result.add tmp + cgsym(m, "NimStringV3") + # string literal not found in the cache: + m.s[cfsStrData].addf("static $4 NimStringV3 $1 = {$2, &$3};$n", + [tmp, rope(n.strVal.len), pureLit, rope(if isConst: "const" else: "")]) + else: + let tmp = getTempName(m) + result.add tmp + m.s[cfsStrData].addf("static $4 NimStringV3 $1 = {$2, &$3};$n", + [tmp, rope(n.strVal.len), m.tmpBase & rope(id), + rope(if isConst: "const" else: "")]) + +proc genStringLiteralV3Const(m: BModule; n: PNode; isConst: bool; result: var Rope) = + let id = nodeTableTestOrSet(m.dataCache, n, m.labels) + var pureLit: Rope + if id == m.labels: + pureLit = getTempName(m) + cgsym(m, "NimStringV3") + # string literal not found in the cache: + genStringLiteralDataOnlyV3(m, n.strVal, pureLit, isConst) + else: + pureLit = m.tmpBase & rope(id) + result.addf "{$1, &$2}", [rope(n.strVal.len), pureLit] + # ------ Version selector --------------------------------------------------- proc genStringLiteralDataOnly(m: BModule; s: string; info: TLineInfo; @@ -104,6 +142,10 @@ proc genStringLiteralDataOnly(m: BModule; s: string; info: TLineInfo; let tmp = getTempName(m) genStringLiteralDataOnlyV2(m, s, tmp, isConst) result.add tmp + of 3: + let tmp = getTempName(m) + genStringLiteralDataOnlyV3(m, s, tmp, isConst) + result.add tmp else: localError(m.config, info, "cannot determine how to produce code for string literal") @@ -114,5 +156,6 @@ proc genStringLiteral(m: BModule; n: PNode; result: var Rope) = case detectStrVersion(m) of 0, 1: genStringLiteralV1(m, n, result) of 2: genStringLiteralV2(m, n, isConst = true, result) + of 3: genStringLiteralV3(m, n, isConst = true, result) else: localError(m.config, n.info, "cannot determine how to produce code for string literal") diff --git a/compiler/ccgtypes.nim b/compiler/ccgtypes.nim index 462b08a438..175be7951f 100644 --- a/compiler/ccgtypes.nim +++ b/compiler/ccgtypes.nim @@ -311,6 +311,9 @@ proc getSimpleTypeDesc(m: BModule; typ: PType): Rope = result = typeNameOrLiteral(m, typ, "void*") of tyString: case detectStrVersion(m) + of 3: + cgsym(m, "NimStringV3") + result = typeNameOrLiteral(m, typ, "NimStringV3") of 2: cgsym(m, "NimStrPayload") cgsym(m, "NimStringV2") diff --git a/lib/system.nim b/lib/system.nim index 8c17afaa02..10481768aa 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -1616,7 +1616,10 @@ when not defined(js) and defined(nimV2): proc supportsCopyMem(t: typedesc): bool {.magic: "TypeTrait".} when notJSnotNims and defined(nimSeqsV2): - include "system/strs_v2" + when defined(nimSeqsV3): + include "system/strs_v3" + else: + include "system/strs_v2" include "system/seqs_v2" when not defined(js): @@ -1679,7 +1682,11 @@ when not defined(js): result = newString(len) else: result = newStringOfCap(len) - when defined(nimSeqsV2): + when defined(nimSeqsV3): + let s = cast[ptr NimStringV3](addr result) + if len > 0: + s.len = len + elif defined(nimSeqsV2): let s = cast[ptr NimStringV2](addr result) if len > 0: s.len = len diff --git a/lib/system/assign.nim b/lib/system/assign.nim index 9f4cbc0feb..8be9c324d5 100644 --- a/lib/system/assign.nim +++ b/lib/system/assign.nim @@ -61,7 +61,11 @@ proc genericAssignAux(dest, src: pointer, mt: PNimType, shallow: bool) = sysAssert(mt != nil, "genericAssignAux 2") case mt.kind of tyString: - when defined(nimSeqsV2): + when defined(nimSeqsV3): + var x = cast[ptr NimStringV3](dest) + var s2 = cast[ptr NimStringV3](s)[] + nimAsgnStrV2(x[], s2) + elif defined(nimSeqsV2): var x = cast[ptr NimStringV2](dest) var s2 = cast[ptr NimStringV2](s)[] nimAsgnStrV2(x[], s2) @@ -244,7 +248,11 @@ proc genericReset(dest: pointer, mt: PNimType) = of tyRef: unsureAsgnRef(cast[PPointer](dest), nil) of tyString: - when defined(nimSeqsV2): + when defined(nimSeqsV3): + var s = cast[ptr NimStringV3](dest) + frees(s[]) + zeroMem(dest, mt.size) + elif defined(nimSeqsV2): var s = cast[ptr NimStringV2](dest) frees(s[]) zeroMem(dest, mt.size) diff --git a/lib/system/deepcopy.nim b/lib/system/deepcopy.nim index 72d35f5186..040e3265db 100644 --- a/lib/system/deepcopy.nim +++ b/lib/system/deepcopy.nim @@ -90,7 +90,11 @@ proc genericDeepCopyAux(dest, src: pointer, mt: PNimType; tab: var PtrTable) = sysAssert(mt != nil, "genericDeepCopyAux 2") case mt.kind of tyString: - when defined(nimSeqsV2): + when defined(nimSeqsV3): + var x = cast[ptr NimStringV3](dest) + var s2 = cast[ptr NimStringV3](s)[] + nimAsgnStrV2(x[], s2) + elif defined(nimSeqsV2): var x = cast[ptr NimStringV2](dest) var s2 = cast[ptr NimStringV2](s)[] nimAsgnStrV2(x[], s2) diff --git a/lib/system/strs_v3.nim b/lib/system/strs_v3.nim index 28569cb9e2..2951bfefe6 100644 --- a/lib/system/strs_v3.nim +++ b/lib/system/strs_v3.nim @@ -11,25 +11,29 @@ type NimStrPayloadBase = object + ## cap lives at the negative offset of p: p - wordSize cap: int - NimStrPayload {.core.} = object - data: UncheckedArray[char] - NimStringV3 {.core.} = object rawlen: int ## the lowest bit is used to indict whether it's a const or intern string + ## TODO: would it be better to use distinct? p: ptr UncheckedArray[char] ## can be nil if len == 0. - ## cap lives at the negative offset + ## cap lives at the negative offset of p: p - wordSize ## non-zero terminated const nimStrVersion {.core.} = 3 template isLiteral(s): bool = (s.rawlen and 1) == 1 + +template head(p: pointer): pointer = + cast[pointer](cast[int](p) -% sizeof(NimStrPayloadBase)) + + template cap(p: pointer): int = - cast[ptr NimStrPayloadBase](cast[int](p) -% sizeof(NimStrPayloadBase))[].cap + cast[ptr NimStrPayloadBase](head(p))[].cap template `cap=`(p: pointer, size: int) = - cast[ptr NimStrPayloadBase](cast[int](p) -% sizeof(NimStrPayloadBase))[].cap = size + cast[ptr NimStrPayloadBase](head(p))[].cap = size template len(s: NimStringV3): int = s.rawlen shr 1 @@ -41,24 +45,25 @@ proc markIntern(s: var NimStringV3): bool = s.rawlen = s.rawlen or 1 result = not isLiteral(s) -proc unsafeUnmarkIntern(s: NimStringV3) = +proc unsafeUnmarkIntern(s: var NimStringV3) = + s.rawlen = s.rawlen and (not 1) # unmark? when compileOption("threads"): - deallocShared(s.p -% sizeof(NimStrPayloadBase)) + deallocShared(head(s.p)) else: - dealloc(s.p -% sizeof(NimStrPayloadBase)) + dealloc(head(s.p)) template contentSize(cap): int = cap + sizeof(NimStrPayloadBase) template frees(s) = if not isLiteral(s): when compileOption("threads"): - deallocShared(s.p -% sizeof(NimStrPayloadBase)) + deallocShared(head(s.p)) else: - dealloc(s.p -% sizeof(NimStrPayloadBase)) + dealloc(head(s.p)) template allocPayload(newLen: int): ptr UncheckedArray[char] = when compileOption("threads"): - cast[ptr UncheckedArray[char]](allocShared(contentSize(newLen) +! sizeof(NimStrPayloadBase))) + cast[ptr UncheckedArray[char]](allocShared(contentSize(newLen)) +! sizeof(NimStrPayloadBase)) else: cast[ptr UncheckedArray[char]](alloc(contentSize(newLen)) +! sizeof(NimStrPayloadBase)) @@ -99,10 +104,9 @@ proc prepareAdd(s: var NimStringV3; addLen: int) {.compilerRtl.} = let oldCap = s.p.cap if newLen > oldCap: let newCap = max(newLen, resize(oldCap)) - s.p = reallocPayload(s.p -% sizeof(NimStrPayloadBase), newCap) + s.p = reallocPayload(head(s.p), newCap) s.p.cap = newCap if newLen < newCap: - ## TODO: be careful with off by one zeroMem(cast[pointer](addr s.p[newLen]), newCap - newLen) proc nimAddCharV1(s: var NimStringV3; c: char) {.compilerRtl, inl.} = @@ -139,6 +143,7 @@ proc appendString(dest: var NimStringV3; src: NimStringV3) {.compilerproc, inlin proc appendChar(dest: var NimStringV3; c: char) {.compilerproc, inline.} = dest.p[dest.len] = c + incRawLen(dest) proc rawNewString(space: int): NimStringV3 {.compilerproc.} = # this is also 'system.newStringOfCap'. @@ -169,15 +174,13 @@ proc setLengthStrV2(s: var NimStringV3, newLen: int) {.compilerRtl.} = copyMem(unsafeAddr s.p[0], unsafeAddr oldP[0], min(s.len, newLen)) if newLen > s.len: zeroMem(cast[pointer](addr s.p[s.len]), newLen - s.len) - # else: - # s.p.data[newLen] = '\0' else: zeroMem(cast[pointer](addr s.p[0]), newLen) elif newLen > s.len: let oldCap = s.p.cap if newLen > oldCap: let newCap = max(newLen, resize(oldCap)) - s.p = reallocPayload0(s.p -% sizeof(NimStrPayloadBase), oldCap, newCap) + s.p = reallocPayload0(head(s.p), oldCap, newCap) s.p.cap = newCap s.rawlen = toRawLen(newLen)