lexer cleanups (#23037)

* remove some dead code and leftovers from past features * fix yaml printing of uint64 literals
2026-02-11 22:08:54 +00:00 · 2023-12-06 18:17:57 +01:00
parent 44b64e726e
commit e1a0ff1b8a
7 changed files with 20 additions and 76 deletions
--- a/compiler/astalgo.nim
+++ b/compiler/astalgo.nim
@@ -370,7 +370,7 @@ proc treeToYamlAux(conf: ConfigRef; n: PNode, marker: var IntSet, indent: int,
      if conf != nil:
        result.addf(",$N$1\"info\": $2", [istr, lineInfoToStr(conf, n.info)])
      case n.kind
-      of nkCharLit..nkInt64Lit:
+      of nkCharLit..nkUInt64Lit:
        result.addf(",$N$1\"intVal\": $2", [istr, rope(n.intVal)])
      of nkFloatLit, nkFloat32Lit, nkFloat64Lit:
        result.addf(",$N$1\"floatVal\": $2",
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -122,7 +122,6 @@ type
                              # this is needed because scanning comments
                              # needs so much look-ahead
    currLineIndent*: int
-    strongSpaces*, allowTabs*: bool
    errorHandler*: ErrorHandler
    cache*: IdentCache
    when defined(nimsuggest):
@@ -176,32 +175,6 @@ proc printTok*(conf: ConfigRef; tok: Token) =
  # xxx factor with toLocation
  msgWriteln(conf, $tok.line & ":" & $tok.col & "\t" & $tok.tokType & " " & $tok)

-proc initToken*(L: var Token) =
-  L.tokType = tkInvalid
-  L.iNumber = 0
-  L.indent = 0
-  L.spacing = {}
-  L.literal = ""
-  L.fNumber = 0.0
-  L.base = base10
-  L.ident = nil
-  when defined(nimpretty):
-    L.commentOffsetA = 0
-    L.commentOffsetB = 0
-
-proc fillToken(L: var Token) =
-  L.tokType = tkInvalid
-  L.iNumber = 0
-  L.indent = 0
-  L.spacing = {}
-  setLen(L.literal, 0)
-  L.fNumber = 0.0
-  L.base = base10
-  L.ident = nil
-  when defined(nimpretty):
-    L.commentOffsetA = 0
-    L.commentOffsetB = 0
-
 proc openLexer*(lex: var Lexer, fileIdx: FileIndex, inputstream: PLLStream;
                 cache: IdentCache; config: ConfigRef) =
  openBaseLexer(lex, inputstream)
@@ -798,7 +771,7 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
    if mode != normal: tok.tokType = tkRStrLit
    else: tok.tokType = tkStrLit
    while true:
-      var c = L.buf[pos]
+      let c = L.buf[pos]
      if c == '\"':
        if mode != normal and L.buf[pos+1] == '\"':
          inc(pos, 2)
@@ -824,7 +797,7 @@ proc getCharacter(L: var Lexer; tok: var Token) =
  tokenBegin(tok, L.bufpos)
  let startPos = L.bufpos
  inc(L.bufpos)               # skip '
-  var c = L.buf[L.bufpos]
+  let c = L.buf[L.bufpos]
  case c
  of '\0'..pred(' '), '\'':
    lexMessage(L, errGenerated, "invalid character literal")
@@ -942,7 +915,7 @@ proc getOperator(L: var Lexer, tok: var Token) =
  tokenBegin(tok, pos)
  var h: Hash = 0
  while true:
-    var c = L.buf[pos]
+    let c = L.buf[pos]
    if c in OpChars:
      h = h !& ord(c)
      inc(pos)
@@ -1010,23 +983,6 @@ proc getPrecedence*(tok: Token): int =
  of tkOr, tkXor, tkPtr, tkRef: result = 3
  else: return -10

-proc newlineFollows*(L: Lexer): bool =
-  result = false
-  var pos = L.bufpos
-  while true:
-    case L.buf[pos]
-    of ' ', '\t':
-      inc(pos)
-    of CR, LF:
-      result = true
-      break
-    of '#':
-      inc(pos)
-      if L.buf[pos] == '#': inc(pos)
-      if L.buf[pos] != '[': return true
-    else:
-      break
-
 proc skipMultiLineComment(L: var Lexer; tok: var Token; start: int;
                          isDoc: bool) =
  var pos = start
@@ -1118,9 +1074,7 @@ proc scanComment(L: var Lexer, tok: var Token) =
        toStrip = 0
      else:  # found first non-whitespace character
        stripInit = true
-    var lastBackslash = -1
    while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
-      if L.buf[pos] == '\\': lastBackslash = pos+1
      tok.literal.add(L.buf[pos])
      inc(pos)
    tokenEndIgnore(tok, pos)
@@ -1163,7 +1117,7 @@ proc skip(L: var Lexer, tok: var Token) =
      inc(pos)
      tok.spacing.incl(tsLeading)
    of '\t':
-      if not L.allowTabs: lexMessagePos(L, errGenerated, pos, "tabs are not allowed, use spaces instead")
+      lexMessagePos(L, errGenerated, pos, "tabs are not allowed, use spaces instead")
      inc(pos)
    of CR, LF:
      tokenEndPrevious(tok, pos)
@@ -1231,7 +1185,7 @@ proc rawGetTok*(L: var Lexer, tok: var Token) =
        L.previousToken.line = tok.line.uint16
        L.previousToken.col = tok.col.int16

-  fillToken(tok)
+  reset(tok)
  if L.indentAhead >= 0:
    tok.indent = L.indentAhead
    L.currLineIndent = L.indentAhead
@@ -1243,7 +1197,7 @@ proc rawGetTok*(L: var Lexer, tok: var Token) =
    if tok.tokType == tkComment:
      L.indentAhead = L.currLineIndent
      return
-  var c = L.buf[L.bufpos]
+  let c = L.buf[L.bufpos]
  tok.line = L.lineNumber
  tok.col = getColNumber(L, L.bufpos)
  if c in SymStartChars - {'r', 'R'} - UnicodeOperatorStartChars:
@@ -1402,7 +1356,6 @@ proc getIndentWidth*(fileIdx: FileIndex, inputstream: PLLStream;
  result = 0
  var lex: Lexer = default(Lexer)
  var tok: Token = default(Token)
-  initToken(tok)
  openLexer(lex, fileIdx, inputstream, cache, config)
  var prevToken = tkEof
  while tok.tokType != tkEof:
@@ -1415,11 +1368,11 @@ proc getIndentWidth*(fileIdx: FileIndex, inputstream: PLLStream;

 proc getPrecedence*(ident: PIdent): int =
  ## assumes ident is binary operator already
-  var tok: Token
-  initToken(tok)
-  tok.ident = ident
-  tok.tokType =
-    if tok.ident.id in ord(tokKeywordLow) - ord(tkSymbol)..ord(tokKeywordHigh) - ord(tkSymbol):
-      TokType(tok.ident.id + ord(tkSymbol))
-    else: tkOpr
+  let
+    tokType =
+      if ident.id in ord(tokKeywordLow) - ord(tkSymbol)..ord(tokKeywordHigh) - ord(tkSymbol):
+        TokType(ident.id + ord(tkSymbol))
+      else: tkOpr
+    tok = Token(ident: ident, tokType: tokType)
+
  getPrecedence(tok)
--- a/compiler/main.nim
+++ b/compiler/main.nim
@@ -224,7 +224,6 @@ proc commandScan(cache: IdentCache, config: ConfigRef) =
    var
      L: Lexer
      tok: Token = default(Token)
-    initToken(tok)
    openLexer(L, f, stream, cache, config)
    while true:
      rawGetTok(L, tok)
--- a/compiler/nimconf.nim
+++ b/compiler/nimconf.nim
@@ -222,7 +222,6 @@ proc readConfigFile*(filename: AbsoluteFile; cache: IdentCache;
    stream: PLLStream
  stream = llStreamOpen(filename, fmRead)
  if stream != nil:
-    initToken(tok)
    openLexer(L, filename, stream, cache, config)
    tok.tokType = tkEof       # to avoid a pointless warning
    var condStack: seq[bool] = @[]
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -83,10 +83,6 @@ type
  PrimaryMode = enum
    pmNormal, pmTypeDesc, pmTypeDef, pmTrySimple

-proc parseAll*(p: var Parser): PNode
-proc closeParser*(p: var Parser)
-proc parseTopLevelStmt*(p: var Parser): PNode
-
 # helpers for the other parsers
 proc isOperator*(tok: Token): bool
 proc getTok*(p: var Parser)
@@ -144,7 +140,7 @@ proc openParser*(p: var Parser, fileIdx: FileIndex, inputStream: PLLStream,
                 cache: IdentCache; config: ConfigRef) =
  ## Open a parser, using the given arguments to set up its internal state.
  ##
-  initToken(p.tok)
+  reset(p.tok)
  openLexer(p.lex, fileIdx, inputStream, cache, config)
  when defined(nimpretty):
    openEmitter(p.em, cache, config, fileIdx)
@@ -156,7 +152,7 @@ proc openParser*(p: var Parser, filename: AbsoluteFile, inputStream: PLLStream,
                 cache: IdentCache; config: ConfigRef) =
  openParser(p, fileInfoIdx(config, filename), inputStream, cache, config)

-proc closeParser(p: var Parser) =
+proc closeParser*(p: var Parser) =
  ## Close a parser, freeing up its resources.
  closeLexer(p.lex)
  when defined(nimpretty):
@@ -2520,7 +2516,7 @@ proc parseStmt(p: var Parser): PNode =
          if err and p.tok.tokType == tkEof: break
  setEndInfo()

-proc parseAll(p: var Parser): PNode =
+proc parseAll*(p: var Parser): PNode =
  ## Parses the rest of the input stream held by the parser into a PNode.
  result = newNodeP(nkStmtList, p)
  while p.tok.tokType != tkEof:
@@ -2540,7 +2536,7 @@ proc checkFirstLineIndentation*(p: var Parser) =
  if p.tok.indent != 0 and tsLeading in p.tok.spacing:
    parMessage(p, errInvalidIndentation)

-proc parseTopLevelStmt(p: var Parser): PNode =
+proc parseTopLevelStmt*(p: var Parser): PNode =
  ## Implements an iterator which, when called repeatedly, returns the next
  ## top-level statement or emptyNode if end of stream.
  result = p.emptyNode
--- a/compiler/renderer.nim
+++ b/compiler/renderer.nim
@@ -125,6 +125,8 @@ template outside(g: var TSrcGen, section: Section, body: untyped) =
 const
  IndentWidth = 2
  longIndentWid = IndentWidth * 2
+  MaxLineLen = 80
+  LineCommentColumn = 30

 when defined(nimpretty):
  proc minmaxLine(n: PNode): (int, int) =
@@ -143,10 +145,6 @@ when defined(nimpretty):
  proc lineDiff(a, b: PNode): int =
    result = minmaxLine(b)[0] - minmaxLine(a)[1]

-const
-  MaxLineLen = 80
-  LineCommentColumn = 30
-
 proc initSrcGen(renderFlags: TRenderFlags; config: ConfigRef): TSrcGen =
  result = TSrcGen(comStack: @[], tokens: @[], indent: 0,
                   lineLen: 0, pos: 0, idx: 0, buf: "",
--- a/tools/grammar_nanny.nim
+++ b/tools/grammar_nanny.nim
@@ -22,7 +22,6 @@ proc checkGrammarFileImpl(cache: IdentCache, config: ConfigRef) =
    var
      L: Lexer
      tok: Token
-    initToken(tok)
    openLexer(L, f, stream, cache, config)
    # load the first token:
    rawGetTok(L, tok)