Refactor JS sourcemap generator (#21053)

* Parse the new line format * Fix pattern Didn't have space after the path * Remove duplicate sources Sources would sometimes double up with a new line at the end * Remove unused variable * Refactor sourcemap.nim Removes the multiple translations needed, now goes from single high level type to the final SourceMap Adds documentation for procs * Line numbers line up properly now Files aren't linking correctly though * Files now link up correctly Lines are sometimes off but overall seems pretty good Just need to implement parser * Add column info to output Add sourceMappingURL to rope directly to prevent copy * Properly handle columns * Remove debug lines * Add testcase * Finish testcase * Use the outdir folder instead of the folder the test is in to find the sourcemap Co-authored-by: ringabout <43030857+ringabout@users.noreply.github.com>
2026-02-12 06:18:51 +00:00 · 2022-12-13 15:21:51 +11:00
parent d84f64d2e7
commit 1fefb8e92a
3 changed files with 283 additions and 361 deletions
--- a/compiler/jsgen.nim
+++ b/compiler/jsgen.nim
@@ -724,8 +724,9 @@ proc hasFrameInfo(p: PProc): bool =
      ((p.prc == nil) or not (sfPure in p.prc.flags))

 proc lineDir(config: ConfigRef, info: TLineInfo, line: int): Rope =
-  ropes.`%`("/* line $2 \"$1\" */$n",
-         [rope(toFullPath(config, info)), rope(line)])
+  "/* line $2:$3 \"$1\" */$n" % [
+    rope(toFullPath(config, info)), rope(line), rope(info.toColumn)
+  ]

 proc genLineDir(p: PProc, n: PNode) =
  let line = toLinenumber(n.info)
@@ -2885,7 +2886,8 @@ proc myClose(graph: ModuleGraph; b: PPassContext, n: PNode): PNode =
    # Generate an optional source map.
    if optSourcemap in m.config.globalOptions:
      var map: SourceMap
-      (code, map) = genSourceMap($(code), outFile.string)
+      map = genSourceMap($code, outFile.string)
+      code &= "\n//# sourceMappingURL=$#.map" % [outFile.string]
      writeFile(outFile.string & ".map", $(%map))
    # Check if the generated JS code matches the output file, or else
    # write it to the file.
--- a/compiler/sourcemap.nim
+++ b/compiler/sourcemap.nim
@@ -1,383 +1,207 @@
-import os, strformat, strutils, tables, sets, ropes, json, algorithm
+import std/[strutils, strscans, parseutils, assertions]

 type
-  SourceNode* = ref object
-    line*:      int
-    column*:    int
-    source*:    string
-    name*:      string
-    children*:  seq[Child]
+  Segment = object
+    ## Segment refers to a block of something in the JS output.
+    ## This could be a token or an entire line
+    original: int # Column in the Nim source
+    generated: int # Column in the generated JS
+    name: int # Index into names list (-1 for no name)

-  C = enum cSourceNode, cSourceString
+  Mapping = object
+    ## Mapping refers to a line in the JS output.
+    ## It is made up of segments which refer to the tokens in the line
+    case inSource: bool # Whether the line in JS has Nim equivilant
+    of true:
+      file: int # Index into files list
+      line: int # 0 indexed line of code in the Nim source
+      segments: seq[Segment]
+    else: discard

-  Child* = ref object
-    case kind*: C:
-    of cSourceNode:
-      node*:  SourceNode
-    of cSourceString:
-      s*:     string
+  SourceInfo = object
+    mappings: seq[Mapping]
+    names, files: seq[string]

-  SourceMap* = ref object
+  SourceMap* = object
    version*:   int
    sources*:   seq[string]
    names*:     seq[string]
    mappings*:  string
    file*:      string
-    # sourceRoot*: string
-    # sourcesContent*: string

-  SourceMapGenerator = ref object
-    file:           string
-    sourceRoot:     string
-    skipValidation: bool
-    sources:        seq[string]
-    names:          seq[string]
-    mappings:       seq[Mapping]
+func addSegment(info: var SourceInfo, original, generated: int, name: string = "") {.raises: [].} =
+  ## Adds a new segment into the current line
+  assert info.mappings.len > 0, "No lines have been added yet"
+  var segment = Segment(original: original, generated: generated, name: -1)
+  if name != "":
+    # Make name be index into names list
+    segment.name = info.names.find(name)
+    if segment.name == -1:
+      segment.name = info.names.len
+      info.names &= name

-  Mapping* = ref object
-    source*:        string
-    original*:      tuple[line: int, column: int]
-    generated*:     tuple[line: int, column: int]
-    name*:          string
-    noSource*:      bool
-    noName*:        bool
+  assert info.mappings[^1].inSource, "Current line isn't in Nim source"
+  info.mappings[^1].segments &= segment

+func newLine(info: var SourceInfo) {.raises: [].} =
+  ## Add new mapping which doesn't appear in the Nim source
+  info.mappings &= Mapping(inSource: false)

-proc child*(s: string): Child =
-  Child(kind: cSourceString, s: s)
-
-
-proc child*(node: SourceNode): Child =
-  Child(kind: cSourceNode, node: node)
-
-
-proc newSourceNode(line: int, column: int, path: string, node: SourceNode, name: string = ""): SourceNode =
-  SourceNode(line: line, column: column, source: path, name: name, children: @[child(node)])
-
-
-proc newSourceNode(line: int, column: int, path: string, s: string, name: string = ""): SourceNode =
-  SourceNode(line: line, column: column, source: path, name: name, children: @[child(s)])
-
-
-proc newSourceNode(line: int, column: int, path: string, children: seq[Child], name: string = ""): SourceNode =
-  SourceNode(line: line, column: column, source: path, name: name, children: children)
-
-
-
-
-# debugging
-
-
-proc text*(sourceNode: SourceNode, depth: int): string =
-  let empty = "  "
-  result = &"{repeat(empty, depth)}SourceNode({sourceNode.source}:{sourceNode.line}:{sourceNode.column}):\n"
-  for child in sourceNode.children:
-    if child.kind == cSourceString:
-      result.add(&"{repeat(empty, depth + 1)}{child.s}\n")
-    else:
-      result.add(child.node.text(depth + 1))
-
-
-proc `$`*(sourceNode: SourceNode): string = text(sourceNode, 0)
+func newLine(info: var SourceInfo, file: string, line: int) {.raises: [].} =
+  ## Starts a new line in the mappings. Call addSegment after this to add
+  ## segments into the line
+  var mapping = Mapping(inSource: true, line: line)
+  # Set file to file position. Add in if needed
+  mapping.file = info.files.find(file)
+  if mapping.file == -1:
+    mapping.file = info.files.len
+    info.files &= file
+  info.mappings &= mapping


 # base64_VLQ
+func encode*(values: seq[int]): string {.raises: [].} =
+  ## Encodes a series of integers into a VLQ base64 encoded string
+  # References:
+  #   - https://www.lucidchart.com/techblog/2019/08/22/decode-encoding-base64-vlqs-source-maps/
+  #   - https://github.com/rails/sprockets/blob/main/guides/source_maps.md#source-map-file
+  const
+    alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+    shift = 5
+    continueBit = 1 shl 5
+    mask = continueBit - 1
+  for val in values:
+    # Sign is stored in first bit
+    var newVal = abs(val) shl 1
+    if val < 0:
+      newVal = newVal or 1
+    # Now comes the variable length part
+    # This is how we are able to store large numbers
+    while true:
+      # We only encode 5 bits.
+      var masked = newVal and mask
+      newVal = newVal shr shift
+      # If there is still something left
+      # then signify with the continue bit that the
+      # decoder should keep decoding
+      if newVal > 0:
+        masked = masked or continueBit
+      result &= alphabet[masked]
+      # If the value is zero then we have nothing left to encode
+      if newVal == 0:
+        break

+iterator tokenize*(line: string): (int, string) =
+  ## Goes through a line and splits it into Nim identifiers and
+  ## normal JS code. This allows us to map mangled names back to Nim names.
+  ## Yields (column, name). Doesn't yield anything but identifiers.
+  ## See mangleName in compiler/jsgen.nim for how name mangling is done
+  var
+    col = 0
+    token = ""
+  while col < line.len:
+    var
+      token: string
+      name: string
+    # First we find the next identifier
+    col += line.skipWhitespace(col)
+    col += line.skipUntil(IdentStartChars, col)
+    let identStart = col
+    col += line.parseIdent(token, col)
+    # Idents will either be originalName_randomInt or HEXhexCode_randomInt
+    if token.startsWith("HEX"):
+      var hex: int
+      # 3 = "HEX".len and we only want to parse the two integers after it
+      discard token[3 ..< 5].parseHex(hex)
+      name = $chr(hex)
+    elif not token.endsWith("_Idx"): # Ignore address indexes
+      # It might be in the form originalName_randomInt
+      let lastUnderscore = token.rfind('_')
+      if lastUnderscore != -1:
+        name = token[0..<lastUnderscore]
+    if name != "":
+      yield (identStart, name)

-let integers = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
-
-
-proc encode*(i: int): string =
-  result = ""
-  var n = i
-  if n < 0:
-    n = (-n shl 1) or 1
-  else:
-    n = n shl 1
-
-  var z = 0
-  while z == 0 or n > 0:
-    var e = n and 31
-    n = n shr 5
-    if n > 0:
-      e = e or 32
-
-    result.add(integers[e])
-    z += 1
-
-
-type TokenState = enum Normal, String, Ident, Mangled
-
-iterator tokenize*(line: string): (bool, string) =
-  # result = @[]
-  var state = Normal
-  var token = ""
-  var isMangled = false
-  for z, ch in line:
-    if ch.isAlphaAscii:
-      if state == Normal:
-        state = Ident
-        if token.len > 0:
-          yield (isMangled, token)
-        token = $ch
-        isMangled = false
-      else:
-        token.add(ch)
-    elif ch == '_':
-      if state == Ident:
-        state = Mangled
-        isMangled = true
-      token.add($ch)
-    elif ch != '"' and not ch.isAlphaNumeric:
-      if state in {Ident, Mangled}:
-        state = Normal
-        if token.len > 0:
-          yield (isMangled, token)
-        token = $ch
-        isMangled = false
-      else:
-        token.add($ch)
-    elif ch == '"':
-      if state != String:
-        state = String
-        if token.len > 0:
-          yield (isMangled, token)
-        token = $ch
-        isMangled = false
-      else:
-        state = Normal
-        token.add($ch)
-        if token.len > 0:
-          yield (isMangled, token)
-        isMangled = false
-        token = ""
+func parse*(source: string): SourceInfo =
+  ## Parses the JS output for embedded line info
+  ## So it can convert those into a series of mappings
+  var
+    skipFirstLine = true
+    currColumn = 0
+    currLine = 0
+    currFile = ""
+  # Add each line as a node into the output
+  for line in source.splitLines():
+    var
+      lineNumber: int
+      linePath: string
+      column: int
+    if line.strip().scanf("/* line $i:$i \"$+\" */", lineNumber, column, linePath):
+      # When we reach the first line mappinsegmentg then we can assume
+      # we can map the rest of the JS lines to Nim lines
+      currColumn = column # Column is already zero indexed
+      currLine = lineNumber - 1
+      currFile = linePath
+      # Lines are zero indexed
+      result.newLine(currFile, currLine)
+      # Skip whitespace to find the starting column
+      result.addSegment(currColumn, line.skipWhitespace())
+    elif currFile != "":
+      result.newLine(currFile, currLine)
+      # There mightn't be any tokens so add a starting segment
+      result.addSegment(currColumn, line.skipWhitespace())
+      for jsColumn, token in line.tokenize:
+        result.addSegment(currColumn, jsColumn, token)
    else:
-      token.add($ch)
-  if token.len > 0:
-    yield (isMangled, token)
+      result.newLine()

-proc parse*(source: string, path: string): SourceNode =
-  let lines = source.splitLines()
-  var lastLocation: SourceNode = nil
-  result = newSourceNode(0, 0, path, @[])
-    
-  # we just use one single parent and add all nim lines
-  # as its children, I guess in typical codegen
-  # that happens recursively on ast level
-  # we also don't have column info, but I doubt more one nim lines can compile to one js
-  # maybe in macros?
+func toSourceMap*(info: SourceInfo, file: string): SourceMap {.raises: [].} =
+  ## Convert from high level SourceInfo into the required SourceMap object
+  # Add basic info
+  result.version = 3
+  result.file = file
+  result.sources = info.files
+  result.names = info.names
+  # Convert nodes into mappings.
+  # Mappings are split into blocks where each block referes to a line in the outputted JS.
+  # Blocks can be seperated into statements which refere to tokens on the line.
+  # Since the mappings depend on previous values we need to
+  # keep track of previous file, name, etc
+  var
+    prevFile = 0
+    prevLine = 0
+    prevName = 0
+    prevNimCol = 0

-  for i, originalLine in lines:
-    let line = originalLine.strip
-    if line.len == 0:
-      continue
-      
-    # this shouldn't be a problem:
-    # jsgen doesn't generate comments
-    # and if you emit // line you probably know what you're doing
-    if line.startsWith("// line"):
-      if result.children.len > 0:
-        result.children[^1].node.children.add(child(line & "\n"))
-      let pos = line.find(" ", 8)
-      let lineNumber = line[8 .. pos - 1].parseInt
-      let linePath = line[pos + 2 .. ^2] # quotes
-      
-      lastLocation = newSourceNode(
-        lineNumber,
-        0,
-        linePath,
-        @[])
-      result.children.add(child(lastLocation))
-    else:
-      var last: SourceNode
-      for token in line.tokenize():
-        var name = ""
-        if token[0]:
-          name = token[1].split('_', 1)[0]
-        
-        
-        if result.children.len > 0:
-          result.children[^1].node.children.add(
-            child(
-              newSourceNode(
-                result.children[^1].node.line,
-                0,
-                result.children[^1].node.source,
-                token[1],
-                name)))
-          last = result.children[^1].node.children[^1].node
-        else:
-          result.children.add(
-            child(
-              newSourceNode(i + 1, 0, path, token[1], name)))
-          last = result.children[^1].node
-      let nl = "\n"
-      if not last.isNil:
-        last.source.add(nl)
+  for mapping in info.mappings:
+    # We know need to encode segments with the following fields
+    # All these fields are relative to their previous values
+    # - 0: Column in generated code
+    # - 1: Index of Nim file in source list
+    # - 2: Line in Nim source
+    # - 3: Column in Nim source
+    # - 4: Index in names list
+    if mapping.inSource:
+      # JS Column is special in that it is reset after every line
+      var prevJSCol = 0
+      for segment in mapping.segments:
+        var values = @[segment.generated - prevJSCol, mapping.file - prevFile, mapping.line - prevLine, segment.original - prevNimCol]
+        # Add name field if needed
+        if segment.name != -1:
+          values &= segment.name - prevName
+          prevName = segment.name
+        prevJSCol = segment.generated
+        prevNimCol = segment.original
+        prevFile = mapping.file
+        prevLine = mapping.line
+        result.mappings &= encode(values) & ","
+      # Remove trailing ,
+      if mapping.segments.len > 0:
+        result.mappings.setLen(result.mappings.len - 1)

-proc cmp(a: Mapping, b: Mapping): int =
-  var c = cmp(a.generated, b.generated)
-  if c != 0:
-    return c
+    result.mappings &= ";"

-  c = cmp(a.source, b.source)
-  if c != 0:
-    return c
-
-  c = cmp(a.original, b.original)
-  if c != 0:
-    return c
-
-  return cmp(a.name, b.name)
-
-
-proc index*[T](elements: seq[T], element: T): int =
-  for z in 0 ..< elements.len:
-    if elements[z] == element:
-      return z
-  return -1
-
-
-proc serializeMappings(map: SourceMapGenerator, mappings: seq[Mapping]): string =
-  var previous = Mapping(generated: (line: 1, column: 0), original: (line: 0, column: 0), name: "", source: "")
-  var previousSourceId = 0
-  var previousNameId = 0
-  var next = ""
-  var nameId = 0
-  var sourceId = 0
-  result = ""
-
-  for z, mapping in mappings:
-    next = ""
-
-    if mapping.generated.line != previous.generated.line:
-      previous.generated.column = 0
-
-      while mapping.generated.line != previous.generated.line:
-        next.add(";")
-        previous.generated.line += 1
-
-    else:
-      if z > 0:
-        if cmp(mapping, mappings[z - 1]) == 0:
-          continue
-        next.add(",")
-
-    next.add(encode(mapping.generated.column - previous.generated.column))
-    previous.generated.column = mapping.generated.column
-
-    if not mapping.noSource and mapping.source.len > 0:
-      sourceId = map.sources.index(mapping.source)
-      next.add(encode(sourceId - previousSourceId))
-      previousSourceId = sourceId
-      next.add(encode(mapping.original.line - 1 - previous.original.line))
-      previous.original.line = mapping.original.line - 1
-      next.add(encode(mapping.original.column - previous.original.column))
-      previous.original.column = mapping.original.column
-
-      if not mapping.noName and mapping.name.len > 0:
-        nameId = map.names.index(mapping.name)
-        next.add(encode(nameId - previousNameId))
-        previousNameId = nameId
-
-    result.add(next)
-
-
-proc gen*(map: SourceMapGenerator): SourceMap =
-  var mappings = map.mappings.sorted do (a: Mapping, b: Mapping) -> int:
-    cmp(a, b)
-  result = SourceMap(
-    file: map.file,
-    version: 3,
-    sources: map.sources[0..^1],
-    names: map.names[0..^1],
-    mappings: map.serializeMappings(mappings))
-
-
-
-proc addMapping*(map: SourceMapGenerator, mapping: Mapping) =
-  if not mapping.noSource and mapping.source notin map.sources:
-    map.sources.add(mapping.source)
-
-  if not mapping.noName and mapping.name.len > 0 and mapping.name notin map.names:
-    map.names.add(mapping.name)
-
-  # echo "map ", mapping.source, " ", mapping.original, " ", mapping.generated, " ", mapping.name
-  map.mappings.add(mapping)
-
-
-proc walk*(node: SourceNode, fn: proc(line: string, original: SourceNode)) =
-  for child in node.children:
-    if child.kind == cSourceString and child.s.len > 0:
-      fn(child.s, node)
-    else:
-      child.node.walk(fn)
-
-
-proc toSourceMap*(node: SourceNode, file: string): SourceMapGenerator =
-  var map = SourceMapGenerator(file: file, sources: @[], names: @[], mappings: @[])
-
-  var generated = (line: 1, column: 0)
-  var sourceMappingActive = false
-  var lastOriginal = SourceNode(source: "", line: -1, column: 0, name: "", children: @[])
-
-  node.walk do (line: string, original: SourceNode):
-    if original.source.endsWith(".js"):
-      # ignore it
-      discard
-    else:
-      if original.line != -1:
-        if lastOriginal.source != original.source or
-           lastOriginal.line != original.line or
-           lastOriginal.column != original.column or
-           lastOriginal.name != original.name:
-          map.addMapping(
-            Mapping(
-              source: original.source,
-              original: (line: original.line, column: original.column),
-              generated: (line: generated.line, column: generated.column),
-              name: original.name))
-
-        lastOriginal = SourceNode(
-          source: original.source,
-          line: original.line,
-          column: original.column,
-          name: original.name,
-          children: lastOriginal.children)
-        sourceMappingActive = true
-      elif sourceMappingActive:
-        map.addMapping(
-          Mapping(
-            noSource: true,
-            noName: true,
-            generated: (line: generated.line, column: generated.column),
-            original: (line: -1, column: -1)))
-        lastOriginal.line = -1
-        sourceMappingActive = false
-
-    for z in 0 ..< line.len:
-      if line[z] in Newlines:
-        generated.line += 1
-        generated.column = 0
-
-        if z == line.len - 1:
-          lastOriginal.line = -1
-          sourceMappingActive = false
-        elif sourceMappingActive:
-          map.addMapping(
-            Mapping(
-              source: original.source,
-              original: (line: original.line, column: original.column),
-              generated: (line: generated.line, column: generated.column),
-              name: original.name))
-      else:
-        generated.column += 1
-    
-  map
-
-
-proc genSourceMap*(source: string, outFile: string): (Rope, SourceMap) =
-  let node = parse(source, outFile)
-  let map = node.toSourceMap(file = outFile)
-  ((&"{source}\n//# sourceMappingURL={outFile}.map").rope, map.gen)
+proc genSourceMap*(source: string, outFile: string): SourceMap =
+  let node = parse(source)
+  result = node.toSourceMap(outFile)

--- a/tests/js/tsourcemap.nim
+++ b/tests/js/tsourcemap.nim
@@ -0,0 +1,96 @@
+discard """
+  action: "run"
+  target: "js"
+  cmd: "nim js -r -d:nodejs $options --sourceMap:on $file"
+"""
+import std/[os, json, strutils, sequtils, algorithm, assertions, paths, compilesettings]
+
+# Implements a very basic sourcemap parser and then runs it on itself.
+# Allows to check for basic problems such as bad counts and lines missing (e.g. issue #21052)
+
+type
+  SourceMap = object
+    version:   int
+    sources:   seq[string]
+    names:     seq[string]
+    mappings:  string
+    file:      string
+
+  Line = object
+    line, column: int
+    file: string
+
+const
+  flag = 1 shl 5
+  signBit = 0b1
+  fourBits = 0b1111
+  fiveBits = 0b11111
+  mask = (1 shl 5) - 1
+  alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
+
+var b64Table: seq[int] = 0.repeat(max(alphabet.mapIt(it.ord)) + 1)
+for i, b in alphabet.pairs:
+  b64Table[b.ord] = i
+
+# From https://github.com/juancarlospaco/nodejs/blob/main/src/nodejs/jsfs.nim
+proc importFs*() {.importjs: "var fs = require(\"fs\");".}
+proc readFileSync*(path: cstring): cstring {.importjs: "(fs.$1(#).toString())".}
+importFS()
+# Read in needed files
+let
+  jsFileName = string(querySetting(outDir).Path / "tsourcemap.js".Path)
+  mapFileName = jsFileName & ".map"
+
+  data = parseJson($mapFileName.cstring.readFileSync()).to(SourceMap)
+  jsFile = $readFileSync(jsFileName.cstring)
+
+proc decodeVLQ(inp: string): seq[int] =
+  var
+    shift, value: int
+  for v in inp.mapIt(b64Table[it.ord]):
+    value += (v and mask) shl shift
+    if (v and flag) > 0:
+      shift += 5
+      continue
+    result &= (value shr 1) * (if (value and 1) > 0: -1 else: 1)
+    shift = 0
+    value = 0
+
+
+# Keep track of state
+var
+  line = 0
+  source = 0
+  name = 0
+  column = 0
+  jsLine = 1
+  lines: seq[Line]
+
+for gline in data.mappings.split(';'):
+  jsLine += 1
+  var jsColumn = 0
+  for item in gline.strip().split(','):
+    let value = item.decodeVLQ()
+    doAssert value.len in [0, 1, 4, 5]
+    if value.len == 0:
+      continue
+    jsColumn += value[0]
+    if value.len >= 4:
+      source += value[1]
+      line += value[2]
+      column += value[3]
+      lines &= Line(line: line, column: column, file: data.sources[source])
+
+let jsLines = jsFile.splitLines().len
+# There needs to be a mapping for every line in the JS
+# If there isn't then the JS lines wont match up with Nim lines.
+# Except we don't care about the final line since that doesn't need to line up
+doAssert data.mappings.count(';') == jsLines - 1
+
+# Check we can find this file somewhere in the source map
+var foundSelf = false
+for line in lines:
+  if "tsourcemap.nim" in line.file:
+    foundSelf = true
+    doAssert line.line in 0..<jsLines, "Lines is out of bounds for file"
+doAssert foundSelf, "Couldn't find tsourcemap.nim in source map"