nimpretty: keep a seq of tokens instead of a single string to enable better transformations [refactoring]

2026-02-15 15:44:14 +00:00 · 2019-06-09 22:22:36 +02:00
parent 8317de7648
commit cb47e49d3b
1 changed files with 90 additions and 72 deletions
--- a/compiler/layouter.nim
+++ b/compiler/layouter.nim
@@ -12,6 +12,7 @@
 import idents, lexer, lineinfos, llstream, options, msgs, strutils,
  pathutils
 from os import changeFileExt
+from sequtils import delete

 const
  MaxLineLen = 80
@@ -37,8 +38,8 @@ type
    col, lastLineNumber, lineSpan, indentLevel, indWidth*: int
    keepIndents*: int
    doIndentMore*: int
-    content: string
    kinds: seq[LayoutToken]
+    tokens: seq[string]
    indentStack: seq[int]
    fixedUntil: int # marks where we must not go in the content
    altSplitPos: array[SplitKind, int] # alternative split positions
@@ -55,21 +56,25 @@ proc openEmitter*(em: var Emitter, cache: IdentCache;
  em.lastTok = tkInvalid
  em.inquote = false
  em.col = 0
-  em.content = newStringOfCap(16_000)
  em.indentStack = newSeqOfCap[int](30)
  em.indentStack.add 0
  em.lastLineNumber = 1

 proc closeEmitter*(em: var Emitter) =
  let outFile = em.config.absOutFile
-  if fileExists(outFile) and readFile(outFile.string) == em.content:
+
+  var content = newStringOfCap(16_000)
+  for i in 0..em.tokens.high:
+    content.add em.tokens[i]
+
+  if fileExists(outFile) and readFile(outFile.string) == content:
    discard "do nothing, see #9499"
    return
  var f = llStreamOpen(outFile, fmWrite)
  if f == nil:
    rawMessage(em.config, errGenerated, "cannot open file: " & outFile.string)
    return
-  f.llStreamWrite em.content
+  f.llStreamWrite content
  llStreamClose(f)

 proc countNewlines(s: string): int =
@@ -85,27 +90,29 @@ proc calcCol(em: var Emitter; s: string) =
    inc em.col

 proc wr(em: var Emitter; x: string; lt: LayoutToken) =
-  em.content.add x
+  em.tokens.add x
  em.kinds.add lt
  inc em.col, x.len
+  assert em.tokens.len == em.kinds.len

-when false:
-  proc wrNewline(em: var Emitter) =
-    em.tokens.add "\L"
-    em.kinds.add ltNewline
-    inc em.tokensSum, 1
-    em.col = 0
+proc wrNewline(em: var Emitter) =
+  em.tokens.add "\L"
+  em.kinds.add ltNewline
+  em.col = 0

-  proc wrSpaces(em: var Emitter; spaces: Natural) =
+proc wrSpaces(em: var Emitter; spaces: int) =
+  if spaces > 0:
    wr(em, strutils.repeat(' ', spaces), ltSpaces)

-  proc removeSpaces(em: var Emitter) =
-    while em.kinds.len > 0 and em.kinds[^1] == ltSpaces:
-      let tokenLen = em.tokens[^1].len
-      setLen(em.tokens, em.tokens.len-1)
-      setLen(em.kinds, em.kinds.len-1)
-      dec em.tokensSum, tokenLen
-      dec em.col, tokenLen
+proc wrSpace(em: var Emitter) =
+  wr(em, " ", ltSpaces)
+
+proc removeSpaces(em: var Emitter) =
+  while em.kinds.len > 0 and em.kinds[^1] == ltSpaces:
+    let tokenLen = em.tokens[^1].len
+    setLen(em.tokens, em.tokens.len-1)
+    setLen(em.kinds, em.kinds.len-1)
+    dec em.col, tokenLen

 template goodCol(col): bool = col in 40..MaxLineLen

@@ -123,7 +130,7 @@ const

 template rememberSplit(kind) =
  if goodCol(em.col):
-    em.altSplitPos[kind] = em.content.len
+    em.altSplitPos[kind] = em.tokens.len

 template moreIndent(em): int =
  (if em.doIndentMore > 0: em.indWidth*2 else: em.indWidth)
@@ -137,22 +144,31 @@ proc softLinebreak(em: var Emitter, lit: string) =
      # bug #10295, check first if even more indentation would help:
      let spaces = em.indentLevel+moreIndent(em)
      if spaces < em.col:
-        while em.content.len > 0 and em.content[em.content.high] == ' ':
-          setLen(em.content, em.content.len-1)
-        wr(em, "\L", ltNewline)
+        removeSpaces em
+        wrNewline(em)
        em.col = 0
-        for i in 1..spaces: wr(em, " ", ltSpaces)
+        wrSpaces em, spaces
    else:
      # search backwards for a good split position:
      for a in mitems(em.altSplitPos):
        if a > em.fixedUntil:
          var spaces = 0
-          while a+spaces < em.content.len and em.content[a+spaces] == ' ':
+          while a+spaces < em.kinds.len and em.kinds[a+spaces] == ltSpaces:
            inc spaces
-          if spaces > 0: delete(em.content, a, a+spaces-1)
-          em.col = em.content.len - a
-          let ws = "\L" & repeat(' ', em.indentLevel+moreIndent(em))
-          em.content.insert(ws, a)
+          if spaces > 0:
+            delete(em.tokens, a, a+spaces-1)
+            delete(em.kinds, a, a+spaces-1)
+          em.kinds.insert(ltNewline, a)
+          em.tokens.insert("\L", a)
+          em.kinds.insert(ltSpaces, a+1)
+          em.tokens.insert(repeat(' ', em.indentLevel+moreIndent(em)), a+1)
+          # recompute em.col:
+          var i = em.kinds.len-1
+          em.col = 0
+          while i >= 0 and em.kinds[i] != ltNewline:
+            inc em.col, em.tokens[i].len
+            dec i
+          # mark position as "already split here"
          a = -1
          break

@@ -168,7 +184,7 @@ proc emitMultilineComment(em: var Emitter, lit: string, col: int) =
    if i == 0:
      discard
    elif stripped.len == 0:
-      wr em, "\L", ltNewline
+      wrNewline em
    else:
      if a > lastIndent:
        b += em.indWidth
@@ -176,17 +192,17 @@ proc emitMultilineComment(em: var Emitter, lit: string, col: int) =
      elif a < lastIndent:
        b -= em.indWidth
        lastIndent = a
-      wr em, "\L", ltNewline
-      for i in 1 .. col + b: wr(em, " ", ltSpaces)
+      wrNewline em
+      wrSpaces em, col + b
    wr em, stripped, ltComment
    inc i

 proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =

  template endsInWhite(em): bool =
-    em.content.len == 0 or em.content[em.content.high] in {' ', '\L'}
+    em.kinds.len == 0 or em.kinds[em.kinds.high] in {ltSpaces, ltNewline}
  template endsInAlpha(em): bool =
-    em.content.len > 0 and em.content[em.content.high] in SymChars+{'_'}
+    em.tokens.len > 0 and em.tokens[em.tokens.high][^1] in SymChars+{'_'}

  template wasExportMarker(em): bool =
    em.kinds.len > 0 and em.kinds[^1] == ltExportMarker
@@ -197,9 +213,9 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
    em.lineSpan = countNewlines(lit)
    if em.lineSpan > 0: calcCol(em, lit)
    if not endsInWhite(em):
-      wr(em, " ", ltSpaces)
+      wrSpace em
      if em.lineSpan == 0 and max(em.col, LineCommentColumn) + lit.len <= MaxLineLen:
-        for i in 1 .. LineCommentColumn - em.col: wr(em, " ", ltSpaces)
+        wrSpaces em, LineCommentColumn - em.col
    if em.lineSpan == 0:
      wr em, lit, ltComment
    else:
@@ -209,13 +225,12 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
    case tok.literal
    of "#!nimpretty off":
      inc em.keepIndents
-      wr(em, "\L", ltNewline)
+      wrNewline em
      em.lastLineNumber = tok.line + 1
    of "#!nimpretty on":
      dec em.keepIndents
      em.lastLineNumber = tok.line
-    wr(em, "\L", ltNewline)
-    #for i in 1 .. tok.indent: wr " "
+    wrNewline em
    wr em, tok.literal, ltComment
    em.col = 0
    em.lineSpan = 0
@@ -226,7 +241,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
    # we have an inline comment so handle it before the indentation token:
    emitComment(em, tok)
    preventComment = true
-    em.fixedUntil = em.content.high
+    em.fixedUntil = em.tokens.high

  elif tok.indent >= 0:
    if em.keepIndents > 0:
@@ -253,24 +268,21 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
       is not touched.
    ]#
    # remove trailing whitespace:
-    while em.content.len > 0 and em.content[em.content.high] == ' ':
-      setLen(em.content, em.content.len-1)
-    wr(em, "\L", ltNewline)
-    for i in 2..tok.line - em.lastLineNumber: wr(em, "\L", ltNewline)
-    em.col = 0
-    for i in 1..em.indentLevel:
-      wr(em, " ", ltSpaces)
-    em.fixedUntil = em.content.high
+    removeSpaces em
+    wrNewline em
+    for i in 2..tok.line - em.lastLineNumber: wrNewline(em)
+    wrSpaces em, em.indentLevel
+    em.fixedUntil = em.tokens.high

  var lastTokWasTerse = false
  case tok.tokType
  of tokKeywordLow..tokKeywordHigh:
    if endsInAlpha(em):
-      wr(em, " ", ltSpaces)
+      wrSpace em
    elif not em.inquote and not endsInWhite(em) and
        em.lastTok notin openPars and not em.lastTokWasTerse:
      #and tok.tokType in oprSet
-      wr(em, " ", ltSpaces)
+      wrSpace em

    if not em.inquote:
      wr(em, TokTypeToStr[tok.tokType], ltKeyword)
@@ -280,7 +292,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
      of tkOr: rememberSplit(splitOr)
      of tkIn, tkNotin:
        rememberSplit(splitIn)
-        wr(em, " ", ltSpaces)
+        wrSpace em
      else: discard
    else:
      # keywords in backticks are not normalized:
@@ -288,15 +300,15 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =

  of tkColon:
    wr(em, TokTypeToStr[tok.tokType], ltOther)
-    wr(em, " ", ltSpaces)
+    wrSpace em
  of tkSemicolon, tkComma:
    wr(em, TokTypeToStr[tok.tokType], ltOther)
    rememberSplit(splitComma)
-    wr(em, " ", ltSpaces)
+    wrSpace em
  of tkParDotLe, tkParLe, tkBracketDotLe, tkBracketLe,
     tkCurlyLe, tkCurlyDotLe, tkBracketLeColon:
    if tok.strongSpaceA > 0 and not em.endsInWhite and not em.wasExportMarker:
-      wr(em, " ", ltSpaces)
+      wrSpace em
    wr(em, TokTypeToStr[tok.tokType], ltOther)
    rememberSplit(splitParLe)
  of tkParRi,
@@ -310,9 +322,9 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
    lastTokWasTerse = true
    wr(em, TokTypeToStr[tok.tokType], ltOther)
  of tkEquals:
-    if not em.inquote and not em.endsInWhite: wr(em, " ", ltSpaces)
+    if not em.inquote and not em.endsInWhite: wrSpace(em)
    wr(em, TokTypeToStr[tok.tokType], ltOther)
-    if not em.inquote: wr(em, " ", ltSpaces)
+    if not em.inquote: wrSpace(em)
  of tkOpr, tkDotDot:
    if ((tok.strongSpaceA == 0 and tok.strongSpaceB == 0) or em.inquote) and
      tok.ident.s notin ["<", ">", "<=", ">=", "==", "!="]:
@@ -321,16 +333,16 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
      # if not surrounded by whitespace, don't produce any whitespace either:
      wr(em, tok.ident.s, ltOpr)
    else:
-      if not em.endsInWhite: wr(em, " ", ltSpaces)
+      if not em.endsInWhite: wrSpace(em)
      wr(em, tok.ident.s, ltOpr)
      template isUnary(tok): bool =
        tok.strongSpaceB == 0 and tok.strongSpaceA > 0

      if not isUnary(tok):
        rememberSplit(splitBinary)
-        wr(em, " ", ltSpaces)
+        wrSpace(em)
  of tkAccent:
-    if not em.inquote and endsInAlpha(em): wr(em, " ", ltSpaces)
+    if not em.inquote and endsInAlpha(em): wrSpace(em)
    wr(em, TokTypeToStr[tok.tokType], ltOther)
    em.inquote = not em.inquote
  of tkComment:
@@ -339,7 +351,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
  of tkIntLit..tkStrLit, tkRStrLit, tkTripleStrLit, tkGStrLit, tkGTripleStrLit, tkCharLit:
    let lit = fileSection(em.config, em.fid, tok.offsetA, tok.offsetB)
    softLinebreak(em, lit)
-    if endsInAlpha(em) and tok.tokType notin {tkGStrLit, tkGTripleStrLit}: wr(em, " ", ltSpaces)
+    if endsInAlpha(em) and tok.tokType notin {tkGStrLit, tkGTripleStrLit}: wrSpace(em)
    em.lineSpan = countNewlines(lit)
    if em.lineSpan > 0: calcCol(em, lit)
    wr em, lit, ltLit
@@ -347,7 +359,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
  else:
    let lit = if tok.ident != nil: tok.ident.s else: tok.literal
    softLinebreak(em, lit)
-    if endsInAlpha(em): wr(em, " ", ltSpaces)
+    if endsInAlpha(em): wrSpace(em)
    wr em, lit, ltIdent

  em.lastTok = tok.tokType
@@ -355,21 +367,27 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
  em.lastLineNumber = tok.line + em.lineSpan
  em.lineSpan = 0

+proc endsWith(em: Emitter; k: varargs[string]): bool =
+  if em.tokens.len < k.len: return false
+  for i in 0..high(k):
+    if em.tokens[em.tokens.len - k.len + i] != k[i]: return false
+  return true
+
 proc starWasExportMarker*(em: var Emitter) =
-  em.kinds[^1] = ltExportMarker
-  if em.content.endsWith(" * "):
-    setLen(em.content, em.content.len-3)
-    em.content.add("*")
+  if em.endsWith(" ", "*", " "):
+    setLen(em.tokens, em.tokens.len-3)
+    setLen(em.kinds, em.kinds.len-3)
+    em.tokens.add("*")
+    em.kinds.add ltExportMarker
    dec em.col, 2

 proc commaWasSemicolon*(em: var Emitter) =
  if em.semicolons == detectSemicolonKind:
-    em.semicolons = if em.content.endsWith(", "): dontTouch else: useSemicolon
-  if em.semicolons == useSemicolon and em.content.endsWith(", "):
-    setLen(em.content, em.content.len-2)
-    em.content.add("; ")
+    em.semicolons = if em.endsWith(",", " "): dontTouch else: useSemicolon
+  if em.semicolons == useSemicolon and em.endsWith(",", " "):
+    em.tokens[em.tokens.len-2] = ";"

 proc curlyRiWasPragma*(em: var Emitter) =
-  if em.content.endsWith("}"):
-    setLen(em.content, em.content.len-1)
-    em.content.add(".}")
+  if em.endsWith("}"):
+    em.tokens[em.tokens.len-1] = ".}"
+    inc em.col