Scan until next special char (", \, \0, \c, \L) and append that slice once. (#25498)

Benchmark comparison (-d:danger --mm:arc --debugger:native -d:useMalloc, OpenAI file benchmark, 5 runs): - Before: 0.196674934, 0.189423191, 0.198763300, 0.197125584, 0.205015032 - After: 0.182827130, 0.183330852, 0.174878542, 0.174360811, 0.181704921 - Median before: 0.197125584s - Median after: 0.181704921s - Improvement: 7.82% faster Callgrind comparison (same build flags): - Total Ir before: 3,219,477,120 - Total Ir after: 2,449,556,167 - Total Ir reduction: 23.91% parseString hotspot: - Before: 1,343,343,723 Ir - After: 573,423,735 Ir - Reduction: 57.31% (cherry picked from commit f4dd00c4cc)
2026-07-15 13:40:53 +00:00 · 2026-05-28 00:31:39 +03:00
parent d6f60ceb61
commit c17a355923
1 changed files with 31 additions and 2 deletions
--- a/lib/pure/parsejson.nim
+++ b/lib/pure/parsejson.nim
@@ -175,23 +175,48 @@ proc parseEscapedUTF16*(buf: cstring, pos: var int): int =
    else:
      return -1

+proc addSpan(dst: var string; src: string; startPos, endPos: int) {.inline.} =
+  let n = endPos - startPos
+  if n <= 0:
+    return
+
+  let old = dst.len
+  dst.setLen old + n
+
+  template impl =
+    for i in 0..<n:
+      dst[old + i] = src[startPos + i]
+
+  when nimvm:
+    impl
+  else:
+    when defined(js) or defined(nimscript):
+      impl
+    else:
+      {.noSideEffect.}:
+        copyMem dst[old].addr, src[startPos].unsafeAddr, n
+
 proc parseString(my: var JsonParser): TokKind =
  result = tkString
  var pos = my.bufpos + 1
+  var spanStart = pos
  if my.rawStringLiterals:
    add(my.a, '"')
  while true:
    case my.buf[pos]
    of '\0':
-      my.err = errQuoteExpected
+      my.err = errInvalidToken
+      addSpan(my.a, my.buf, spanStart, pos)
      result = tkError
      break
    of '"':
+      addSpan(my.a, my.buf, spanStart, pos)
      if my.rawStringLiterals:
        add(my.a, '"')
      inc(pos)
      break
    of '\\':
+      addSpan(my.a, my.buf, spanStart, pos)
      if my.rawStringLiterals:
        add(my.a, '\\')
      case my.buf[pos+1]
@@ -251,14 +276,18 @@ proc parseString(my: var JsonParser): TokKind =
        # don't bother with the error
        add(my.a, my.buf[pos])
        inc(pos)
+      spanStart = pos
    of '\c':
+      addSpan(my.a, my.buf, spanStart, pos)
      pos = lexbase.handleCR(my, pos)
      add(my.a, '\c')
+      spanStart = pos
    of '\L':
+      addSpan(my.a, my.buf, spanStart, pos)
      pos = lexbase.handleLF(my, pos)
      add(my.a, '\L')
+      spanStart = pos
    else:
-      add(my.a, my.buf[pos])
      inc(pos)
  my.bufpos = pos # store back