Scan until next special char (", \, \0, \c, \L) and append that slice once. (#25498)

Benchmark comparison (-d:danger --mm:arc --debugger:native -d:useMalloc,
  OpenAI file benchmark, 5 runs):

- Before: 0.196674934, 0.189423191, 0.198763300, 0.197125584,
0.205015032
- After: 0.182827130, 0.183330852, 0.174878542, 0.174360811, 0.181704921
  - Median before: 0.197125584s
  - Median after: 0.181704921s
  - Improvement: 7.82% faster

  Callgrind comparison (same build flags):

  - Total Ir before: 3,219,477,120
  - Total Ir after: 2,449,556,167
  - Total Ir reduction: 23.91%

  parseString hotspot:

  - Before: 1,343,343,723 Ir
  - After: 573,423,735 Ir
  - Reduction: 57.31%

(cherry picked from commit f4dd00c4cc)
This commit is contained in:
Antonis Geralis
2026-05-28 00:31:39 +03:00
committed by narimiran
parent d6f60ceb61
commit c17a355923

View File

@@ -175,23 +175,48 @@ proc parseEscapedUTF16*(buf: cstring, pos: var int): int =
else:
return -1
proc addSpan(dst: var string; src: string; startPos, endPos: int) {.inline.} =
let n = endPos - startPos
if n <= 0:
return
let old = dst.len
dst.setLen old + n
template impl =
for i in 0..<n:
dst[old + i] = src[startPos + i]
when nimvm:
impl
else:
when defined(js) or defined(nimscript):
impl
else:
{.noSideEffect.}:
copyMem dst[old].addr, src[startPos].unsafeAddr, n
proc parseString(my: var JsonParser): TokKind =
result = tkString
var pos = my.bufpos + 1
var spanStart = pos
if my.rawStringLiterals:
add(my.a, '"')
while true:
case my.buf[pos]
of '\0':
my.err = errQuoteExpected
my.err = errInvalidToken
addSpan(my.a, my.buf, spanStart, pos)
result = tkError
break
of '"':
addSpan(my.a, my.buf, spanStart, pos)
if my.rawStringLiterals:
add(my.a, '"')
inc(pos)
break
of '\\':
addSpan(my.a, my.buf, spanStart, pos)
if my.rawStringLiterals:
add(my.a, '\\')
case my.buf[pos+1]
@@ -251,14 +276,18 @@ proc parseString(my: var JsonParser): TokKind =
# don't bother with the error
add(my.a, my.buf[pos])
inc(pos)
spanStart = pos
of '\c':
addSpan(my.a, my.buf, spanStart, pos)
pos = lexbase.handleCR(my, pos)
add(my.a, '\c')
spanStart = pos
of '\L':
addSpan(my.a, my.buf, spanStart, pos)
pos = lexbase.handleLF(my, pos)
add(my.a, '\L')
spanStart = pos
else:
add(my.a, my.buf[pos])
inc(pos)
my.bufpos = pos # store back