nimpretty: first version that seems useful for others to test it

2026-04-19 05:50:30 +00:00 · 2018-06-16 01:46:39 +02:00
parent bf5d619a52
commit a1bd4a6cbd
6 changed files with 239 additions and 6 deletions
--- a/compiler/layouter.nim
+++ b/compiler/layouter.nim
@@ -0,0 +1,201 @@
+#
+#
+#           The Nim Compiler
+#        (c) Copyright 2018 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Layouter for nimpretty. Still primitive but useful.
+## TODO
+## - Make indentations consistent.
+## - Align 'if' and 'case' expressions properly.
+
+import idents, lexer, lineinfos, llstream, options, msgs, strutils
+from os import changeFileExt
+
+const
+  MaxLineLen = 80
+  LineCommentColumn = 30
+
+type
+  SplitKind = enum
+    splitComma, splitParLe, splitAnd, splitOr, splitIn, splitBinary
+
+  Emitter* = object
+    f: PLLStream
+    config: ConfigRef
+    fid: FileIndex
+    lastTok: TTokType
+    inquote: bool
+    col, lastLineNumber, lineSpan, indentLevel: int
+    content: string
+    fixedUntil: int # marks where we must not go in the content
+    altSplitPos: array[SplitKind, int] # alternative split positions
+
+proc openEmitter*(em: var Emitter, config: ConfigRef, fileIdx: FileIndex) =
+  let outfile = changeFileExt(config.toFullPath(fileIdx), ".pretty.nim")
+  em.f = llStreamOpen(outfile, fmWrite)
+  em.config = config
+  em.fid = fileIdx
+  em.lastTok = tkInvalid
+  em.inquote = false
+  em.col = 0
+  em.content = newStringOfCap(16_000)
+  if em.f == nil:
+    rawMessage(config, errGenerated, "cannot open file: " & outfile)
+
+proc closeEmitter*(em: var Emitter) =
+  em.f.llStreamWrite em.content
+  llStreamClose(em.f)
+
+proc countNewlines(s: string): int =
+  result = 0
+  for i in 0..<s.len:
+    if s[i] == '\L': inc result
+
+proc calcCol(em: var Emitter; s: string) =
+  var i = s.len-1
+  em.col = 0
+  while i >= 0 and s[i] != '\L':
+    dec i
+    inc em.col
+
+template wr(x) =
+  em.content.add x
+  inc em.col, x.len
+
+template goodCol(col): bool = col in 40..MaxLineLen
+
+const splitters = {tkComma, tkSemicolon, tkParLe, tkParDotLe,
+                   tkBracketLe, tkBracketLeColon, tkCurlyDotLe,
+                   tkCurlyLe}
+
+template rememberSplit(kind) =
+  if goodCol(em.col):
+    em.altSplitPos[kind] = em.content.len
+
+proc softLinebreak(em: var Emitter, lit: string) =
+  # XXX Use an algorithm that is outlined here:
+  # https://llvm.org/devmtg/2013-04/jasper-slides.pdf
+  # +2 because we blindly assume a comma or ' &' might follow
+  if not em.inquote and em.col+lit.len+2 >= MaxLineLen:
+    if em.lastTok in splitters:
+      wr("\L")
+      em.col = 0
+      for i in 1..em.indentLevel+2: wr(" ")
+    else:
+      # search backwards for a good split position:
+      for a in em.altSplitPos:
+        if a > em.fixedUntil:
+          let ws = "\L" & repeat(' ',em.indentLevel+2)
+          em.col = em.content.len - a
+          em.content.insert(ws, a)
+          break
+
+proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
+
+  template endsInWhite(em): bool =
+    em.content.len > 0 and em.content[em.content.high] in {' ', '\L'}
+  template endsInAlpha(em): bool =
+    em.content.len > 0 and em.content[em.content.high] in SymChars+{'_'}
+
+  proc emitComment(em: var Emitter; tok: TToken) =
+    let lit = strip fileSection(em.config, em.fid, tok.commentOffsetA, tok.commentOffsetB)
+    em.lineSpan = countNewlines(lit)
+    if em.lineSpan > 0: calcCol(em, lit)
+    if not endsInWhite(em):
+      wr(" ")
+      if em.lineSpan == 0 and max(em.col, LineCommentColumn) + lit.len <= MaxLineLen:
+        for i in 1 .. LineCommentColumn - em.col: wr(" ")
+    wr lit
+
+  var preventComment = false
+  if tok.tokType == tkComment and tok.line == em.lastLineNumber and tok.indent >= 0:
+    # we have an inline comment so handle it before the indentation token:
+    emitComment(em, tok)
+    preventComment = true
+    em.fixedUntil = em.content.high
+
+  elif tok.indent >= 0:
+    em.indentLevel = tok.indent
+    # remove trailing whitespace:
+    while em.content.len > 0 and em.content[em.content.high] == ' ':
+      setLen(em.content, em.content.len-1)
+    wr("\L")
+    for i in 2..tok.line - em.lastLineNumber: wr("\L")
+    em.col = 0
+    for i in 1..tok.indent:
+      wr(" ")
+    em.fixedUntil = em.content.high
+
+  case tok.tokType
+  of tokKeywordLow..tokKeywordHigh:
+    if endsInAlpha(em): wr(" ")
+    wr(TokTypeToStr[tok.tokType])
+
+    case tok.tokType
+    of tkAnd: rememberSplit(splitAnd)
+    of tkOr: rememberSplit(splitOr)
+    of tkIn: rememberSplit(splitIn)
+    else: discard
+
+  of tkColon:
+    wr(TokTypeToStr[tok.tokType])
+    wr(" ")
+  of tkSemicolon, tkComma:
+    wr(TokTypeToStr[tok.tokType])
+    wr(" ")
+    rememberSplit(splitComma)
+  of tkParLe, tkParRi, tkBracketLe,
+     tkBracketRi, tkCurlyLe, tkCurlyRi,
+     tkBracketDotLe, tkBracketDotRi,
+     tkCurlyDotLe, tkCurlyDotRi,
+     tkParDotLe, tkParDotRi,
+     tkColonColon, tkDot, tkBracketLeColon:
+    wr(TokTypeToStr[tok.tokType])
+    if tok.tokType in splitters:
+      rememberSplit(splitParLe)
+  of tkEquals:
+    if not em.endsInWhite: wr(" ")
+    wr(TokTypeToStr[tok.tokType])
+    wr(" ")
+  of tkOpr, tkDotDot:
+    if not em.endsInWhite: wr(" ")
+    wr(tok.ident.s)
+    template isUnary(tok): bool =
+      tok.strongSpaceB == 0 and tok.strongSpaceA > 0
+
+    if not isUnary(tok) or em.lastTok in {tkOpr, tkDotDot}:
+      wr(" ")
+      rememberSplit(splitBinary)
+  of tkAccent:
+    wr(TokTypeToStr[tok.tokType])
+    em.inquote = not em.inquote
+  of tkComment:
+    if not preventComment:
+      emitComment(em, tok)
+  of tkIntLit..tkStrLit, tkRStrLit, tkTripleStrLit, tkGStrLit, tkGTripleStrLit, tkCharLit:
+    let lit = fileSection(em.config, em.fid, tok.offsetA, tok.offsetB)
+    softLinebreak(em, lit)
+    if endsInAlpha(em) and tok.tokType notin {tkGStrLit, tkGTripleStrLit}: wr(" ")
+    em.lineSpan = countNewlines(lit)
+    if em.lineSpan > 0: calcCol(em, lit)
+    wr lit
+  of tkEof: discard
+  else:
+    let lit = if tok.ident != nil: tok.ident.s else: tok.literal
+    softLinebreak(em, lit)
+    if endsInAlpha(em): wr(" ")
+    wr lit
+
+  em.lastTok = tok.tokType
+  em.lastLineNumber = tok.line + em.lineSpan
+  em.lineSpan = 0
+
+proc starWasExportMarker*(em: var Emitter) =
+  if em.content.endsWith(" * "):
+    setLen(em.content, em.content.len-3)
+    em.content.add("*")
+    dec em.col, 2
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -949,6 +949,8 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
      if isDoc or defined(nimpretty): tok.literal.add buf[pos]
      inc(pos)
  L.bufpos = pos
+  when defined(nimpretty):
+    tok.commentOffsetB = L.offsetBase + pos - 1

 proc scanComment(L: var TLexer, tok: var TToken) =
  var pos = L.bufpos
@@ -957,6 +959,9 @@ proc scanComment(L: var TLexer, tok: var TToken) =
  # iNumber contains the number of '\n' in the token
  tok.iNumber = 0
  assert buf[pos+1] == '#'
+  when defined(nimpretty):
+    tok.commentOffsetA = L.offsetBase + pos - 1
+
  if buf[pos+2] == '[':
    skipMultiLineComment(L, tok, pos+3, true)
    return
@@ -996,6 +1001,8 @@ proc scanComment(L: var TLexer, tok: var TToken) =
      tokenEndIgnore(tok, pos)
      break
  L.bufpos = pos
+  when defined(nimpretty):
+    tok.commentOffsetB = L.offsetBase + pos - 1

 proc skip(L: var TLexer, tok: var TToken) =
  var pos = L.bufpos
@@ -1016,6 +1023,9 @@ proc skip(L: var TLexer, tok: var TToken) =
      inc(pos)
    of CR, LF:
      tokenEndPrevious(tok, pos)
+      when defined(nimpretty):
+        # we are not yet in a comment, so update the comment token's line information:
+        if not hasComment: inc tok.line
      pos = handleCRLF(L, pos)
      buf = L.buf
      var indent = 0
@@ -1055,7 +1065,7 @@ proc skip(L: var TLexer, tok: var TToken) =
  L.bufpos = pos
  when defined(nimpretty):
    if hasComment:
-      tok.commentOffsetB = L.offsetBase + pos
+      tok.commentOffsetB = L.offsetBase + pos - 1
      tok.tokType = tkComment
    if gIndentationWidth <= 0:
      gIndentationWidth = tok.indent
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -29,6 +29,9 @@ when isMainModule:
 import
  llstream, lexer, idents, strutils, ast, astalgo, msgs, options, lineinfos

+when defined(nimpretty2):
+  import layouter
+
 type
  TParser* = object            # A TParser object represents a file that
                               # is being parsed
@@ -41,6 +44,8 @@ type
    inPragma*: int             # Pragma level
    inSemiStmtList*: int
    emptyNode: PNode
+    when defined(nimpretty2):
+      em: Emitter

  SymbolMode = enum
    smNormal, smAllowNil, smAfterDot
@@ -83,6 +88,11 @@ proc getTok(p: var TParser) =
  ## `tok` member.
  rawGetTok(p.lex, p.tok)
  p.hasProgress = true
+  when defined(nimpretty2):
+    emitTok(p.em, p.lex, p.tok)
+    while p.tok.tokType == tkComment:
+      rawGetTok(p.lex, p.tok)
+      emitTok(p.em, p.lex, p.tok)

 proc openParser*(p: var TParser, fileIdx: FileIndex, inputStream: PLLStream,
                 cache: IdentCache; config: ConfigRef;
@@ -91,6 +101,8 @@ proc openParser*(p: var TParser, fileIdx: FileIndex, inputStream: PLLStream,
  ##
  initToken(p.tok)
  openLexer(p.lex, fileIdx, inputStream, cache, config)
+  when defined(nimpretty2):
+    openEmitter(p.em, config, fileIdx)
  getTok(p)                   # read the first token
  p.firstTok = true
  p.strongSpaces = strongSpaces
@@ -104,6 +116,8 @@ proc openParser*(p: var TParser, filename: string, inputStream: PLLStream,
 proc closeParser(p: var TParser) =
  ## Close a parser, freeing up its resources.
  closeLexer(p.lex)
+  when defined(nimpretty2):
+    closeEmitter(p.em)

 proc parMessage(p: TParser, msg: TMsgKind, arg = "") =
  ## Produce and emit the parser message `arg` to output.
@@ -907,6 +921,8 @@ proc identVis(p: var TParser; allowDot=false): PNode =
  #| identVisDot = symbol '.' optInd symbol opr?
  var a = parseSymbol(p)
  if p.tok.tokType == tkOpr:
+    when defined(nimpretty2):
+      starWasExportMarker(p.em)
    result = newNodeP(nkPostfix, p)
    addSon(result, newIdentNodeP(p.tok.ident, p))
    addSon(result, a)
--- a/compiler/renderer.nim
+++ b/compiler/renderer.nim
@@ -1453,11 +1453,12 @@ proc `$`*(n: PNode): string = n.renderTree

 proc renderModule*(n: PNode, infile, outfile: string,
                   renderFlags: TRenderFlags = {};
-                   fid = FileIndex(-1)) =
+                   fid = FileIndex(-1);
+                   conf: ConfigRef = nil) =
  var
    f: File
    g: TSrcGen
-  initSrcGen(g, renderFlags, newPartialConfigRef())
+  initSrcGen(g, renderFlags, conf)
  g.fid = fid
  for i in countup(0, sonsLen(n) - 1):
    gsub(g, n.sons[i])
--- a/tools/nimpretty.nim
+++ b/tools/nimpretty.nim
@@ -42,9 +42,12 @@ proc writeVersion() =
 proc prettyPrint(infile: string) =
  let conf = newConfigRef()
  let fileIdx = fileInfoIdx(conf, infile)
-  let tree = parseFile(fileIdx, newIdentCache(), conf)
-  let outfile = changeFileExt(infile, ".pretty.nim")
-  renderModule(tree, infile, outfile, {}, fileIdx)
+  when defined(nimpretty2):
+    discard parseFile(fileIdx, newIdentCache(), conf)
+  else:
+    let tree = parseFile(fileIdx, newIdentCache(), conf)
+    let outfile = changeFileExt(infile, ".pretty.nim")
+    renderModule(tree, infile, outfile, {}, fileIdx, conf)

 proc main =
  var infile: string
--- a/tools/nimpretty.nim.cfg
+++ b/tools/nimpretty.nim.cfg
@@ -0,0 +1,2 @@
+--define: nimpretty
+--define: nimpretty2