some Token refactors (#21762)

* test some Token refactors * fix CI * showcase for more reductions, will revert * Revert "showcase for more reductions, will revert" This reverts commit 5ba48591f4. * make line and column int32 * remove int32 change
2026-02-11 22:08:54 +00:00 · 2023-05-06 22:27:28 +03:00
parent d0c62fa169
commit ba698d37c3
5 changed files with 28 additions and 33 deletions
--- a/compiler/layouter.nim
+++ b/compiler/layouter.nim
@@ -510,7 +510,7 @@ proc emitTok*(em: var Emitter; L: Lexer; tok: Token) =
    rememberSplit(splitComma)
    wrSpace em
  of openPars:
-    if tok.strongSpaceA and not em.endsInWhite and
+    if tsLeading in tok.spacing and not em.endsInWhite and
        (not em.wasExportMarker or tok.tokType == tkCurlyDotLe):
      wrSpace em
    wr(em, $tok.tokType, ltSomeParLe)
@@ -528,7 +528,7 @@ proc emitTok*(em: var Emitter; L: Lexer; tok: Token) =
    wr(em, $tok.tokType, ltOther)
    if not em.inquote: wrSpace(em)
  of tkOpr, tkDotDot:
-    if em.inquote or (((not tok.strongSpaceA) and tok.strongSpaceB == tsNone) and
+    if em.inquote or (tok.spacing == {} and
        tok.ident.s notin ["<", ">", "<=", ">=", "==", "!="]):
      # bug #9504: remember to not spacify a keyword:
      lastTokWasTerse = true
@@ -538,7 +538,7 @@ proc emitTok*(em: var Emitter; L: Lexer; tok: Token) =
      if not em.endsInWhite: wrSpace(em)
      wr(em, tok.ident.s, ltOpr)
      template isUnary(tok): bool =
-        tok.strongSpaceB == tsNone and tok.strongSpaceA
+        tok.spacing == {tsLeading}

      if not isUnary(tok):
        rememberSplit(splitBinary)
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -94,19 +94,18 @@ type
    base2, base8, base16

  TokenSpacing* = enum
-    tsNone, tsTrailing, tsEof
+    tsLeading, tsTrailing, tsEof

  Token* = object                # a Nim token
    tokType*: TokType            # the type of the token
+    base*: NumericalBase         # the numerical base; only valid for int
+                                 # or float literals
+    spacing*: set[TokenSpacing]  # spaces around token
    indent*: int                 # the indentation; != -1 if the token has been
                                 # preceded with indentation
    ident*: PIdent               # the parsed identifier
    iNumber*: BiggestInt         # the parsed integer literal
    fNumber*: BiggestFloat       # the parsed floating point literal
-    base*: NumericalBase         # the numerical base; only valid for int
-                                 # or float literals
-    strongSpaceA*: bool          # leading spaces of an operator
-    strongSpaceB*: TokenSpacing  # trailing spaces of an operator
    literal*: string             # the parsed (string) literal; and
                                 # documentation comments are here too
    line*, col*: int
@@ -178,7 +177,7 @@ proc initToken*(L: var Token) =
  L.tokType = tkInvalid
  L.iNumber = 0
  L.indent = 0
-  L.strongSpaceA = false
+  L.spacing = {}
  L.literal = ""
  L.fNumber = 0.0
  L.base = base10
@@ -191,7 +190,7 @@ proc fillToken(L: var Token) =
  L.tokType = tkInvalid
  L.iNumber = 0
  L.indent = 0
-  L.strongSpaceA = false
+  L.spacing = {}
  setLen(L.literal, 0)
  L.fNumber = 0.0
  L.base = base10
@@ -960,13 +959,15 @@ proc getOperator(L: var Lexer, tok: var Token) =
  tokenEnd(tok, pos-1)
  # advance pos but don't store it in L.bufpos so the next token (which might
  # be an operator too) gets the preceding spaces:
-  tok.strongSpaceB = tsNone
+  tok.spacing = tok.spacing - {tsTrailing, tsEof}
+  var trailing = false
  while L.buf[pos] == ' ':
    inc pos
-    if tok.strongSpaceB != tsTrailing:
-      tok.strongSpaceB = tsTrailing
+    trailing = true
  if L.buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
-    tok.strongSpaceB = tsEof
+    tok.spacing.incl(tsEof)
+  elif trailing:
+    tok.spacing.incl(tsTrailing)

 proc getPrecedence*(tok: Token): int =
  ## Calculates the precedence of the given token.
@@ -1077,7 +1078,6 @@ proc skipMultiLineComment(L: var Lexer; tok: var Token; start: int;
      when defined(nimpretty): tok.literal.add "\L"
      if isDoc:
        when not defined(nimpretty): tok.literal.add "\n"
-        inc tok.iNumber
        var c = toStrip
        while L.buf[pos] == ' ' and c > 0:
          inc pos
@@ -1096,8 +1096,6 @@ proc skipMultiLineComment(L: var Lexer; tok: var Token; start: int;
 proc scanComment(L: var Lexer, tok: var Token) =
  var pos = L.bufpos
  tok.tokType = tkComment
-  # iNumber contains the number of '\n' in the token
-  tok.iNumber = 0
  assert L.buf[pos+1] == '#'
  when defined(nimpretty):
    tok.commentOffsetA = L.offsetBase + pos
@@ -1140,7 +1138,6 @@ proc scanComment(L: var Lexer, tok: var Token) =
        while L.buf[pos] == ' ' and c > 0:
          inc pos
          dec c
-        inc tok.iNumber
    else:
      if L.buf[pos] > ' ':
        L.indentAhead = indent
@@ -1153,7 +1150,7 @@ proc scanComment(L: var Lexer, tok: var Token) =
 proc skip(L: var Lexer, tok: var Token) =
  var pos = L.bufpos
  tokenBegin(tok, pos)
-  tok.strongSpaceA = false
+  tok.spacing.excl(tsLeading)
  when defined(nimpretty):
    var hasComment = false
    var commentIndent = L.currLineIndent
@@ -1164,8 +1161,7 @@ proc skip(L: var Lexer, tok: var Token) =
    case L.buf[pos]
    of ' ':
      inc(pos)
-      if not tok.strongSpaceA:
-        tok.strongSpaceA = true
+      tok.spacing.incl(tsLeading)
    of '\t':
      if not L.allowTabs: lexMessagePos(L, errGenerated, pos, "tabs are not allowed, use spaces instead")
      inc(pos)
@@ -1187,7 +1183,7 @@ proc skip(L: var Lexer, tok: var Token) =
          pos = L.bufpos
        else:
          break
-      tok.strongSpaceA = false
+      tok.spacing.excl(tsLeading)
      when defined(nimpretty):
        if L.buf[pos] == '#' and tok.line < 0: commentIndent = indent
      if L.buf[pos] > ' ' and (L.buf[pos] != '#' or L.buf[pos+1] == '#'):
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -301,14 +301,13 @@ proc isRightAssociative(tok: Token): bool {.inline.} =
 proc isUnary(tok: Token): bool =
  ## Check if the given token is a unary operator
  tok.tokType in {tkOpr, tkDotDot} and
-  tok.strongSpaceB == tsNone and
-  tok.strongSpaceA
+  tok.spacing == {tsLeading}

 proc checkBinary(p: Parser) {.inline.} =
  ## Check if the current parser token is a binary operator.
  # we don't check '..' here as that's too annoying
  if p.tok.tokType == tkOpr:
-    if p.tok.strongSpaceB == tsTrailing and not p.tok.strongSpaceA:
+    if p.tok.spacing == {tsTrailing}:
      parMessage(p, warnInconsistentSpacing, prettyTok(p.tok))

 #| module = stmt ^* (';' / IND{=})
@@ -516,7 +515,7 @@ proc dotExpr(p: var Parser, a: PNode): PNode =
  optInd(p, result)
  result.add(a)
  result.add(parseSymbol(p, smAfterDot))
-  if p.tok.tokType == tkBracketLeColon and not p.tok.strongSpaceA:
+  if p.tok.tokType == tkBracketLeColon and tsLeading notin p.tok.spacing:
    var x = newNodeI(nkBracketExpr, p.parLineInfo)
    # rewrite 'x.y[:z]()' to 'y[z](x)'
    x.add result[1]
@@ -525,7 +524,7 @@ proc dotExpr(p: var Parser, a: PNode): PNode =
    var y = newNodeI(nkCall, p.parLineInfo)
    y.add x
    y.add result[0]
-    if p.tok.tokType == tkParLe and not p.tok.strongSpaceA:
+    if p.tok.tokType == tkParLe and tsLeading notin p.tok.spacing:
      exprColonEqExprListAux(p, tkParRi, y)
    result = y

@@ -883,7 +882,7 @@ proc primarySuffix(p: var Parser, r: PNode,
    case p.tok.tokType
    of tkParLe:
      # progress guaranteed
-      if p.tok.strongSpaceA:
+      if tsLeading in p.tok.spacing:
        result = commandExpr(p, result, mode)
        break
      result = namedParams(p, result, nkCall, tkParRi)
@@ -895,13 +894,13 @@ proc primarySuffix(p: var Parser, r: PNode,
      result = parseGStrLit(p, result)
    of tkBracketLe:
      # progress guaranteed
-      if p.tok.strongSpaceA:
+      if tsLeading in p.tok.spacing:
        result = commandExpr(p, result, mode)
        break
      result = namedParams(p, result, nkBracketExpr, tkBracketRi)
    of tkCurlyLe:
      # progress guaranteed
-      if p.tok.strongSpaceA:
+      if tsLeading in p.tok.spacing:
        result = commandExpr(p, result, mode)
        break
      result = namedParams(p, result, nkCurlyExpr, tkCurlyRi)
@@ -2525,7 +2524,7 @@ proc parseAll(p: var Parser): PNode =
  setEndInfo()

 proc checkFirstLineIndentation*(p: var Parser) =
-  if p.tok.indent != 0 and p.tok.strongSpaceA:
+  if p.tok.indent != 0 and tsLeading in p.tok.spacing:
    parMessage(p, errInvalidIndentation)

 proc parseTopLevelStmt(p: var Parser): PNode =
--- a/nimpretty/tests/exhaustive.nim
+++ b/nimpretty/tests/exhaustive.nim
@@ -267,7 +267,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
    if not em.endsInWhite: wr(" ")
    wr(tok.ident.s)
    template isUnary(tok): bool =
-      tok.strongSpaceB == tsNone and tok.strongSpaceA
+      tok.spacing == {tsLeading}

    if not isUnary(tok) or em.lastTok in {tkOpr, tkDotDot}:
      wr(" ")
--- a/nimpretty/tests/expected/exhaustive.nim
+++ b/nimpretty/tests/expected/exhaustive.nim
@@ -272,7 +272,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
    if not em.endsInWhite: wr(" ")
    wr(tok.ident.s)
    template isUnary(tok): bool =
-      tok.strongSpaceB == tsNone and tok.strongSpaceA
+      tok.spacing == {tsLeading}

    if not isUnary(tok) or em.lastTok in {tkOpr, tkDotDot}:
      wr(" ")