nimpretty: smart tabs support, fixes #9399 [bugfix]

2026-02-17 00:24:16 +00:00 · 2019-06-10 10:50:02 +02:00
parent 22a1b12038
commit caf93f4355
6 changed files with 159 additions and 59 deletions
--- a/compiler/layouter.nim
+++ b/compiler/layouter.nim
@@ -16,7 +16,6 @@ from sequtils import delete

 const
  MaxLineLen = 80
-  LineCommentColumn = 30

 type
  SplitKind = enum
@@ -26,8 +25,10 @@ type
    detectSemicolonKind, useSemicolon, dontTouch

  LayoutToken = enum
-    ltSpaces, ltNewline, ltComment, ltLit, ltKeyword, ltExportMarker, ltIdent,
-    ltOther, ltOpr
+    ltSpaces, ltNewline, ltTab,
+    ltComment, ltLit, ltKeyword, ltExportMarker, ltIdent,
+    ltOther, ltOpr,
+    ltBeginSection, ltEndSection

  Emitter* = object
    config: ConfigRef
@@ -60,12 +61,59 @@ proc openEmitter*(em: var Emitter, cache: IdentCache;
  em.indentStack.add 0
  em.lastLineNumber = 1

+proc computeMax(em: Emitter; pos: int): int =
+  var p = pos
+  result = 0
+  while p < em.tokens.len and em.kinds[p] != ltEndSection:
+    var lhs = 0
+    var lineLen = 0
+    var foundTab = false
+    while p < em.tokens.len and em.kinds[p] != ltEndSection:
+      if em.kinds[p] == ltNewline:
+        if foundTab and lineLen <= MaxLineLen: result = max(result, lhs)
+        inc p
+        break
+      if em.kinds[p] == ltTab:
+        foundTab = true
+      else:
+        if not foundTab:
+          inc lhs, em.tokens[p].len
+        inc lineLen, em.tokens[p].len
+      inc p
+
+proc computeRhs(em: Emitter; pos: int): int =
+  var p = pos
+  result = 0
+  while p < em.tokens.len and em.kinds[p] != ltNewline:
+    inc result, em.tokens[p].len
+    inc p
+
 proc closeEmitter*(em: var Emitter) =
  let outFile = em.config.absOutFile

  var content = newStringOfCap(16_000)
+  var maxLhs = 0
+  var lineLen = 0
+  var lineBegin = 0
  for i in 0..em.tokens.high:
-    content.add em.tokens[i]
+    case em.kinds[i]
+    of ltBeginSection:
+      maxLhs = computeMax(em, lineBegin)
+    of ltEndSection:
+      maxLhs = 0
+    of ltTab:
+      if maxLhs == 0 or computeRhs(em, i)+maxLhs > MaxLineLen:
+        content.add ' '
+      else:
+        let spaces = max(maxLhs - lineLen + 1, 1)
+        for j in 1..spaces: content.add ' '
+    of ltNewline:
+      content.add em.tokens[i]
+      lineLen = 0
+      lineBegin = i+1
+    else:
+      content.add em.tokens[i]
+      inc lineLen, em.tokens[i].len

  if fileExists(outFile) and readFile(outFile.string) == content:
    discard "do nothing, see #9499"
@@ -107,6 +155,12 @@ proc wrSpaces(em: var Emitter; spaces: int) =
 proc wrSpace(em: var Emitter) =
  wr(em, " ", ltSpaces)

+proc wrTab(em: var Emitter) =
+  wr(em, " ", ltTab)
+
+proc beginSection*(em: var Emitter) = wr(em, "", ltBeginSection)
+proc endSection*(em: var Emitter) = wr(em, "", ltEndSection)
+
 proc removeSpaces(em: var Emitter) =
  while em.kinds.len > 0 and em.kinds[^1] == ltSpaces:
    let tokenLen = em.tokens[^1].len
@@ -197,30 +251,42 @@ proc emitMultilineComment(em: var Emitter, lit: string, col: int) =
    wr em, stripped, ltComment
    inc i

+proc lastChar(s: string): char =
+  result = if s.len > 0: s[s.high] else: '\0'
+
+proc endsInWhite(em: Emitter): bool =
+  var i = em.tokens.len-1
+  while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection}: dec(i)
+  result = if i >= 0: em.kinds[i] in {ltSpaces, ltNewline, ltTab} else: true
+
+proc endsInNewline(em: Emitter): bool =
+  var i = em.tokens.len-1
+  while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection, ltSpaces}: dec(i)
+  result = if i >= 0: em.kinds[i] in {ltNewline, ltTab} else: true
+
+proc endsInAlpha(em: Emitter): bool =
+  var i = em.tokens.len-1
+  while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection}: dec(i)
+  result = if i >= 0: em.tokens[i].lastChar in SymChars+{'_'} else: false
+
+proc emitComment(em: var Emitter; tok: TToken) =
+  let col = em.col
+  let lit = strip fileSection(em.config, em.fid, tok.commentOffsetA, tok.commentOffsetB)
+  em.lineSpan = countNewlines(lit)
+  if em.lineSpan > 0: calcCol(em, lit)
+  if em.lineSpan == 0:
+    if not endsInNewline(em):
+      wrTab em
+    wr em, lit, ltComment
+  else:
+    if not endsInWhite(em):
+      wrTab em
+    emitMultilineComment(em, lit, col)
+
 proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
-
-  template endsInWhite(em): bool =
-    em.kinds.len == 0 or em.kinds[em.kinds.high] in {ltSpaces, ltNewline}
-  template endsInAlpha(em): bool =
-    em.tokens.len > 0 and em.tokens[em.tokens.high][^1] in SymChars+{'_'}
-
  template wasExportMarker(em): bool =
    em.kinds.len > 0 and em.kinds[^1] == ltExportMarker

-  proc emitComment(em: var Emitter; tok: TToken) =
-    let col = em.col
-    let lit = strip fileSection(em.config, em.fid, tok.commentOffsetA, tok.commentOffsetB)
-    em.lineSpan = countNewlines(lit)
-    if em.lineSpan > 0: calcCol(em, lit)
-    if not endsInWhite(em):
-      wrSpace em
-      if em.lineSpan == 0 and max(em.col, LineCommentColumn) + lit.len <= MaxLineLen:
-        wrSpaces em, LineCommentColumn - em.col
-    if em.lineSpan == 0:
-      wr em, lit, ltComment
-    else:
-      emitMultilineComment(em, lit, col)
-
  if tok.tokType == tkComment and tok.literal.startsWith("#!nimpretty"):
    case tok.literal
    of "#!nimpretty off":
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -30,7 +30,7 @@ import
  llstream, lexer, idents, strutils, ast, astalgo, msgs, options, lineinfos,
  pathutils

-when defined(nimpretty2):
+when defined(nimpretty):
  import layouter

 type
@@ -44,7 +44,7 @@ type
    inPragma*: int             # Pragma level
    inSemiStmtList*: int
    emptyNode: PNode
-    when defined(nimpretty2):
+    when defined(nimpretty):
      em*: Emitter

  SymbolMode = enum
@@ -89,12 +89,17 @@ proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode

 # implementation

+template prettySection(body) =
+  when defined(nimpretty): beginSection(p.em)
+  body
+  when defined(nimpretty): endSection(p.em)
+
 proc getTok(p: var TParser) =
  ## Get the next token from the parser's lexer, and store it in the parser's
  ## `tok` member.
  rawGetTok(p.lex, p.tok)
  p.hasProgress = true
-  when defined(nimpretty2):
+  when defined(nimpretty):
    emitTok(p.em, p.lex, p.tok)
    # skip the additional tokens that nimpretty needs but the parser has no
    # interest in:
@@ -108,7 +113,7 @@ proc openParser*(p: var TParser, fileIdx: FileIndex, inputStream: PLLStream,
  ##
  initToken(p.tok)
  openLexer(p.lex, fileIdx, inputStream, cache, config)
-  when defined(nimpretty2):
+  when defined(nimpretty):
    openEmitter(p.em, cache, config, fileIdx)
  getTok(p)                   # read the first token
  p.firstTok = true
@@ -121,7 +126,7 @@ proc openParser*(p: var TParser, filename: AbsoluteFile, inputStream: PLLStream,
 proc closeParser(p: var TParser) =
  ## Close a parser, freeing up its resources.
  closeLexer(p.lex)
-  when defined(nimpretty2):
+  when defined(nimpretty):
    closeEmitter(p.em)

 proc parMessage(p: TParser, msg: TMsgKind, arg = "") =
@@ -384,7 +389,7 @@ proc exprColonEqExpr(p: var TParser): PNode =

 proc exprList(p: var TParser, endTok: TTokType, result: PNode) =
  #| exprList = expr ^+ comma
-  when defined(nimpretty2):
+  when defined(nimpretty):
    inc p.em.doIndentMore
  getTok(p)
  optInd(p, result)
@@ -395,7 +400,7 @@ proc exprList(p: var TParser, endTok: TTokType, result: PNode) =
    if p.tok.tokType != tkComma: break
    getTok(p)
    optInd(p, a)
-  when defined(nimpretty2):
+  when defined(nimpretty):
    dec p.em.doIndentMore

 proc exprColonEqExprListAux(p: var TParser, endTok: TTokType, result: PNode) =
@@ -830,10 +835,10 @@ proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode =
  result = parseOperators(p, result, limit, mode)

 proc simpleExpr(p: var TParser, mode = pmNormal): PNode =
-  when defined(nimpretty2):
+  when defined(nimpretty):
    inc p.em.doIndentMore
  result = simpleExprAux(p, -1, mode)
-  when defined(nimpretty2):
+  when defined(nimpretty):
    dec p.em.doIndentMore

 proc parseIfExpr(p: var TParser, kind: TNodeKind): PNode =
@@ -910,7 +915,7 @@ proc parsePragma(p: var TParser): PNode =
      skipComment(p, a)
  optPar(p)
  if p.tok.tokType in {tkCurlyDotRi, tkCurlyRi}:
-    when defined(nimpretty2):
+    when defined(nimpretty):
      if p.tok.tokType == tkCurlyRi: curlyRiWasPragma(p.em)
    getTok(p)
  else:
@@ -922,7 +927,7 @@ proc identVis(p: var TParser; allowDot=false): PNode =
  #| identVisDot = symbol '.' optInd symbol opr?
  var a = parseSymbol(p)
  if p.tok.tokType == tkOpr:
-    when defined(nimpretty2):
+    when defined(nimpretty):
      starWasExportMarker(p.em)
    result = newNodeP(nkPostfix, p)
    addSon(result, newIdentNodeP(p.tok.ident, p))
@@ -1001,7 +1006,7 @@ proc parseTuple(p: var TParser, indentAllowed = false): PNode =
      var a = parseIdentColonEquals(p, {})
      addSon(result, a)
      if p.tok.tokType notin {tkComma, tkSemiColon}: break
-      when defined(nimpretty2):
+      when defined(nimpretty):
        commaWasSemicolon(p.em)
      getTok(p)
      skipComment(p, a)
@@ -1037,7 +1042,7 @@ proc parseParamList(p: var TParser, retColon = true): PNode =
  var a: PNode
  result = newNodeP(nkFormalParams, p)
  addSon(result, p.emptyNode) # return type
-  when defined(nimpretty2):
+  when defined(nimpretty):
    inc p.em.doIndentMore
    inc p.em.keepIndents
  let hasParLe = p.tok.tokType == tkParLe and p.tok.indent < 0
@@ -1059,7 +1064,7 @@ proc parseParamList(p: var TParser, retColon = true): PNode =
        break
      addSon(result, a)
      if p.tok.tokType notin {tkComma, tkSemiColon}: break
-      when defined(nimpretty2):
+      when defined(nimpretty):
        commaWasSemicolon(p.em)
      getTok(p)
      skipComment(p, a)
@@ -1074,7 +1079,7 @@ proc parseParamList(p: var TParser, retColon = true): PNode =
  elif not retColon and not hasParle:
    # Mark as "not there" in order to mark for deprecation in the semantic pass:
    result = p.emptyNode
-  when defined(nimpretty2):
+  when defined(nimpretty):
    dec p.em.doIndentMore
    dec p.em.keepIndents

@@ -1237,13 +1242,15 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode =
    else: result.kind = nkIteratorTy
  of tkEnum:
    if mode == pmTypeDef:
-      result = parseEnum(p)
+      prettySection:
+        result = parseEnum(p)
    else:
      result = newNodeP(nkEnumTy, p)
      getTok(p)
  of tkObject:
    if mode == pmTypeDef:
-      result = parseObject(p)
+      prettySection:
+        result = parseObject(p)
    else:
      result = newNodeP(nkObjectTy, p)
      getTok(p)
@@ -1696,7 +1703,7 @@ proc parseGenericParamList(p: var TParser): PNode =
    var a = parseGenericParam(p)
    addSon(result, a)
    if p.tok.tokType notin {tkComma, tkSemiColon}: break
-    when defined(nimpretty2):
+    when defined(nimpretty):
      commaWasSemicolon(p.em)
    getTok(p)
    skipComment(p, a)
@@ -2187,10 +2194,16 @@ proc complexOrSimpleStmt(p: var TParser): PNode =
      result = parseOperators(p, result, -1, pmNormal)
    else:
      result = parseSection(p, nkTypeSection, parseTypeDef)
-  of tkConst: result = parseSection(p, nkConstSection, parseConstant)
-  of tkLet: result = parseSection(p, nkLetSection, parseVariable)
+  of tkConst:
+    prettySection:
+      result = parseSection(p, nkConstSection, parseConstant)
+  of tkLet:
+    prettySection:
+      result = parseSection(p, nkLetSection, parseVariable)
+  of tkVar:
+    prettySection:
+      result = parseSection(p, nkVarSection, parseVariable)
  of tkWhen: result = parseIfOrWhen(p, nkWhenStmt)
-  of tkVar: result = parseSection(p, nkVarSection, parseVariable)
  of tkBind: result = parseBind(p, nkBindStmt)
  of tkMixin: result = parseBind(p, nkMixinStmt)
  of tkUsing: result = parseSection(p, nkUsingStmt, parseVariable)
--- a/nimpretty/nimpretty.nim
+++ b/nimpretty/nimpretty.nim
@@ -50,15 +50,11 @@ proc prettyPrint(infile, outfile: string, opt: PrettyOptions) =
  var conf = newConfigRef()
  let fileIdx = fileInfoIdx(conf, AbsoluteFile infile)
  conf.outFile = RelativeFile outfile
-  when defined(nimpretty2):
-    var p: TParsers
-    p.parser.em.indWidth = opt.indWidth
-    if setupParsers(p, fileIdx, newIdentCache(), conf):
-      discard parseAll(p)
-      closeParsers(p)
-  else:
-    let tree = parseFile(fileIdx, newIdentCache(), conf)
-    renderModule(tree, infile, outfile, {}, fileIdx, conf)
+  var p: TParsers
+  p.parser.em.indWidth = opt.indWidth
+  if setupParsers(p, fileIdx, newIdentCache(), conf):
+    discard parseAll(p)
+    closeParsers(p)

 proc main =
  var infile, outfile: string
--- a/nimpretty/nimpretty.nim.cfg
+++ b/nimpretty/nimpretty.nim.cfg
@@ -1,3 +1,2 @@
 --define: nimpretty
--define: nimpretty2
 --define: nimOldCaseObjects
--- a/nimpretty/tests/exhaustive.nim
+++ b/nimpretty/tests/exhaustive.nim
@@ -396,3 +396,16 @@ proc main() =
    discard

 main()
+
+type
+  TCallingConvention* = enum
+    ccDefault,                # proc has no explicit calling convention
+    ccStdCall,    # procedure is stdcall
+    ccCDecl,                  # cdecl
+    ccSafeCall,               # safecall
+    ccSysCall, # system call
+    ccInline,                 # proc should be inlined
+    ccNoInline,               # proc should not be inlined
+    ccFastCall,               # fastcall (pass parameters in registers)
+    ccClosure,        # proc has a closure
+    ccNoConvention       # needed for generating proper C procs sometimes
--- a/nimpretty/tests/expected/exhaustive.nim
+++ b/nimpretty/tests/expected/exhaustive.nim
@@ -29,10 +29,10 @@ var x = 1

 type
  GeneralTokenizer* = object of RootObj ## comment here
-    kind*: TokenClass         ## and here
-    start*, length*: int      ## you know how it goes...
+    kind*: TokenClass                   ## and here
+    start*, length*: int                ## you know how it goes...
    buf: cstring
-    pos: int                  # other comment here.
+    pos: int                            # other comment here.
    state: TokenClass

 var x*: string
@@ -122,7 +122,7 @@ type
    inquote {.pragmaHereWrongCurlyEnd.}: bool
    col, lastLineNumber, lineSpan, indentLevel: int
    content: string
-    fixedUntil: int           # marks where we must not go in the content
+    fixedUntil: int # marks where we must not go in the content
    altSplitPos: array[SplitKind, int] # alternative split positions

 proc openEmitter*[T, S](em: var Emitter; config: ConfigRef;
@@ -406,3 +406,16 @@ proc main() =
  discard

 main()
+
+type
+  TCallingConvention* = enum
+    ccDefault,     # proc has no explicit calling convention
+    ccStdCall,     # procedure is stdcall
+    ccCDecl,       # cdecl
+    ccSafeCall,    # safecall
+    ccSysCall,     # system call
+    ccInline,      # proc should be inlined
+    ccNoInline,    # proc should not be inlined
+    ccFastCall,    # fastcall (pass parameters in registers)
+    ccClosure,     # proc has a closure
+    ccNoConvention # needed for generating proper C procs sometimes