Adds support for custom ASTs in the Nim parser (#23417)

This commit is contained in:
Andreas Rumpf
2024-03-18 20:27:00 +01:00
committed by GitHub
parent cbf48a253f
commit 6c4c60eade
6 changed files with 449 additions and 245 deletions

View File

@@ -49,9 +49,14 @@ when isMainModule or defined(nimTestGrammar):
checkGrammarFile()
import
llstream, lexer, idents, ast, msgs, options, lineinfos,
llstream, lexer, idents, msgs, options, lineinfos,
pathutils
when not defined(nimCustomAst):
import ast
else:
import plugins / customast
import std/strutils
when defined(nimpretty):
@@ -73,7 +78,8 @@ type
bufposPrevious*: int
inPragma*: int # Pragma level
inSemiStmtList*: int
emptyNode: PNode
when not defined(nimCustomAst):
emptyNode: PNode
when defined(nimpretty):
em*: Emitter
@@ -83,6 +89,10 @@ type
PrimaryMode = enum
pmNormal, pmTypeDesc, pmTypeDef, pmTrySimple
when defined(nimCustomAst):
# For the `customast` version we cannot share nodes, not even empty nodes:
template emptyNode(p: Parser): PNode = newNode(nkEmpty)
# helpers for the other parsers
proc isOperator*(tok: Token): bool
proc getTok*(p: var Parser)
@@ -91,7 +101,7 @@ proc skipComment*(p: var Parser, node: PNode)
proc newNodeP*(kind: TNodeKind, p: Parser): PNode
proc newIntNodeP*(kind: TNodeKind, intVal: BiggestInt, p: Parser): PNode
proc newFloatNodeP*(kind: TNodeKind, floatVal: BiggestFloat, p: Parser): PNode
proc newStrNodeP*(kind: TNodeKind, strVal: string, p: Parser): PNode
proc newStrNodeP*(kind: TNodeKind, strVal: sink string, p: Parser): PNode
proc newIdentNodeP*(ident: PIdent, p: Parser): PNode
proc expectIdentOrKeyw*(p: Parser)
proc expectIdent*(p: Parser)
@@ -146,7 +156,8 @@ proc openParser*(p: var Parser, fileIdx: FileIndex, inputStream: PLLStream,
openEmitter(p.em, cache, config, fileIdx)
getTok(p) # read the first token
p.firstTok = true
p.emptyNode = newNode(nkEmpty)
when not defined(nimCustomAst):
p.emptyNode = newNode(nkEmpty)
proc openParser*(p: var Parser, filename: AbsoluteFile, inputStream: PLLStream,
cache: IdentCache; config: ConfigRef) =
@@ -261,24 +272,20 @@ proc indAndComment(p: var Parser, n: PNode, maybeMissEquals = false) =
skipComment(p, n)
proc newNodeP(kind: TNodeKind, p: Parser): PNode =
result = newNodeI(kind, parLineInfo(p))
result = newNode(kind, parLineInfo(p))
proc newIntNodeP(kind: TNodeKind, intVal: BiggestInt, p: Parser): PNode =
result = newNodeP(kind, p)
result.intVal = intVal
result = newAtom(kind, intVal, parLineInfo(p))
proc newFloatNodeP(kind: TNodeKind, floatVal: BiggestFloat,
p: Parser): PNode =
result = newNodeP(kind, p)
result.floatVal = floatVal
result = newAtom(kind, floatVal, parLineInfo(p))
proc newStrNodeP(kind: TNodeKind, strVal: string, p: Parser): PNode =
result = newNodeP(kind, p)
result.strVal = strVal
proc newStrNodeP(kind: TNodeKind, strVal: sink string, p: Parser): PNode =
result = newAtom(kind, strVal, parLineInfo(p))
proc newIdentNodeP(ident: PIdent, p: Parser): PNode =
result = newNodeP(nkIdent, p)
result.ident = ident
result = newAtom(ident, parLineInfo(p))
proc parseExpr(p: var Parser): PNode
proc parseStmt(p: var Parser): PNode
@@ -381,7 +388,7 @@ proc parseSymbol(p: var Parser, mode = smNormal): PNode =
while true:
case p.tok.tokType
of tkAccent:
if result.len == 0:
if not result.hasSon:
parMessage(p, errIdentifierExpected, p.tok)
break
of tkOpr, tkDot, tkDotDot, tkEquals, tkParLe..tkParDotRi:
@@ -391,8 +398,7 @@ proc parseSymbol(p: var Parser, mode = smNormal): PNode =
tkParLe..tkParDotRi}:
accm.add($p.tok)
getTok(p)
let node = newNodeI(nkIdent, lineinfo)
node.ident = p.lex.cache.getIdent(accm)
let node = newAtom(p.lex.cache.getIdent(accm), lineinfo)
result.add(node)
of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCustomLit:
result.add(newIdentNodeP(p.lex.cache.getIdent($p.tok), p))
@@ -509,26 +515,26 @@ proc exprColonEqExprList(p: var Parser, kind: TNodeKind,
proc dotExpr(p: var Parser, a: PNode): PNode =
var info = p.parLineInfo
getTok(p)
result = newNodeI(nkDotExpr, info)
result = newNode(nkDotExpr, info)
optInd(p, result)
result.add(a)
result.add(parseSymbol(p, smAfterDot))
if p.tok.tokType == tkBracketLeColon and tsLeading notin p.tok.spacing:
var x = newNodeI(nkBracketExpr, p.parLineInfo)
var x = newNode(nkBracketExpr, p.parLineInfo)
# rewrite 'x.y[:z]()' to 'y[z](x)'
x.add result[1]
x.add result.secondSon
exprList(p, tkBracketRi, x)
eat(p, tkBracketRi)
var y = newNodeI(nkCall, p.parLineInfo)
var y = newNode(nkCall, p.parLineInfo)
y.add x
y.add result[0]
y.add result.firstSon
if p.tok.tokType == tkParLe and tsLeading notin p.tok.spacing:
exprColonEqExprListAux(p, tkParRi, y)
result = y
proc dotLikeExpr(p: var Parser, a: PNode): PNode =
var info = p.parLineInfo
result = newNodeI(nkInfix, info)
result = newNode(nkInfix, info)
optInd(p, result)
var opNode = newIdentNodeP(p.tok.ident, p)
getTok(p)
@@ -584,12 +590,18 @@ proc parseCast(p: var Parser): PNode =
eat(p, tkParRi)
setEndInfo()
template setNodeFlag(n: PNode; f: untyped) =
when defined(nimCustomAst):
discard
else:
incl n.flags, f
proc setBaseFlags(n: PNode, base: NumericalBase) =
case base
of base10: discard
of base2: incl(n.flags, nfBase2)
of base8: incl(n.flags, nfBase8)
of base16: incl(n.flags, nfBase16)
of base2: setNodeFlag(n, nfBase2)
of base8: setNodeFlag(n, nfBase8)
of base16: setNodeFlag(n, nfBase16)
proc parseGStrLit(p: var Parser, a: PNode): PNode =
case p.tok.tokType
@@ -885,7 +897,7 @@ proc primarySuffix(p: var Parser, r: PNode,
result = commandExpr(p, result, mode)
break
result = namedParams(p, result, nkCall, tkParRi)
if result.len > 1 and result[1].kind == nkExprColonExpr:
if result.has2Sons and result.secondSon.kind == nkExprColonExpr:
result.transitionSonsKind(nkObjConstr)
of tkDot:
# progress guaranteed
@@ -1156,7 +1168,7 @@ proc parseParamList(p: var Parser, retColon = true): PNode =
if hasRet and p.tok.indent < 0:
getTok(p)
optInd(p, result)
result[0] = parseTypeDesc(p)
result.replaceFirstSon parseTypeDesc(p)
elif not retColon and not hasParLe:
# Mark as "not there" in order to mark for deprecation in the semantic pass:
result = p.emptyNode
@@ -1201,9 +1213,9 @@ proc parseProcExpr(p: var Parser; isExpr: bool; kind: TNodeKind): PNode =
params = params, name = p.emptyNode, pattern = p.emptyNode,
genericParams = p.emptyNode, pragmas = pragmas, exceptions = p.emptyNode)
skipComment(p, result)
result[bodyPos] = parseStmt(p)
result.replaceSon bodyPos, parseStmt(p)
else:
result = newNodeI(if kind == nkIteratorDef: nkIteratorTy else: nkProcTy, info)
result = newNode(if kind == nkIteratorDef: nkIteratorTy else: nkProcTy, info)
if hasSignature or pragmas.kind != nkEmpty:
if hasSignature:
result.add(params)
@@ -1316,7 +1328,7 @@ proc parseExpr(p: var Parser): PNode =
result = parseFor(p)
of tkWhen:
nimprettyDontTouch:
result = parseIfOrWhenExpr(p, nkWhenExpr)
result = parseIfOrWhenExpr(p, nkWhenStmt)
of tkCase:
# Currently we think nimpretty is good enough with case expressions,
# so it is allowed to touch them:
@@ -1475,7 +1487,7 @@ proc makeCall(n: PNode): PNode =
if n.kind in nkCallKinds:
result = n
else:
result = newNodeI(nkCall, n.info)
result = newNode(nkCall, n.info)
result.add n
proc postExprBlocks(p: var Parser, x: PNode): PNode =
@@ -1506,9 +1518,9 @@ proc postExprBlocks(p: var Parser, x: PNode): PNode =
var stmtList = newNodeP(nkStmtList, p)
stmtList.add parseStmt(p)
# to keep backwards compatibility (see tests/vm/tstringnil)
if stmtList[0].kind == nkStmtList: stmtList = stmtList[0]
if stmtList.firstSon.kind == nkStmtList: stmtList = stmtList.firstSon
stmtList.flags.incl nfBlockArg
setNodeFlag stmtList, nfBlockArg
if openingParams.kind != nkEmpty or openingPragmas.kind != nkEmpty:
if openingParams.kind == nkEmpty:
openingParams = newNodeP(nkFormalParams, p)
@@ -1552,7 +1564,7 @@ proc postExprBlocks(p: var Parser, x: PNode): PNode =
eat(p, tkColon)
nextBlock.add parseStmt(p)
nextBlock.flags.incl nfBlockArg
setNodeFlag nextBlock, nfBlockArg
result.add nextBlock
if nextBlock.kind in {nkElse, nkFinally}: break
@@ -1578,7 +1590,7 @@ proc parseExprStmt(p: var Parser): PNode =
# if an expression is starting here, a simplePrimary was parsed and
# this is the start of a command
if p.tok.indent < 0 and isExprStart(p):
result = newTreeI(nkCommand, a.info, a)
result = newTree(nkCommand, a.info, a)
let baseIndent = p.currInd
while true:
result.add(commandParam(p, isFirstParam, pmNormal))
@@ -1969,13 +1981,13 @@ proc parseRoutine(p: var Parser, kind: TNodeKind): PNode =
else:
result.add(p.emptyNode)
indAndComment(p, result, maybeMissEquals)
let body = result[^1]
if body.kind == nkStmtList and body.len > 0 and body[0].comment.len > 0 and body[0].kind != nkCommentStmt:
let body = result.lastSon
if body.kind == nkStmtList and body.hasSon and body.firstSon.comment.len > 0 and body.firstSon.kind != nkCommentStmt:
if result.comment.len == 0:
# proc fn*(a: int): int = a ## foo
# => moves comment `foo` to `fn`
result.comment = body[0].comment
body[0].comment = ""
result.comment = body.firstSon.comment
body.firstSon.comment = ""
#else:
# assert false, p.lex.config$body.info # avoids hard to track bugs, fail early.
# Yeah, that worked so well. There IS a bug in this logic, now what?
@@ -2009,7 +2021,7 @@ proc parseSection(p: var Parser, kind: TNodeKind,
else:
parMessage(p, errIdentifierExpected, p.tok)
break
if result.len == 0: parMessage(p, errIdentifierExpected, p.tok)
if not result.hasSon: parMessage(p, errIdentifierExpected, p.tok)
elif p.tok.tokType in {tkSymbol, tkAccent, tkParLe} and p.tok.indent < 0:
# tkParLe is allowed for ``var (x, y) = ...`` tuple parsing
result.add(defparser(p))
@@ -2060,7 +2072,7 @@ proc parseEnum(p: var Parser): PNode =
if p.tok.indent >= 0 and p.tok.indent <= p.currInd or
p.tok.tokType == tkEof:
break
if result.len <= 1:
if not result.has2Sons:
parMessage(p, errIdentifierExpected, p.tok)
setEndInfo()
@@ -2320,7 +2332,7 @@ proc parseVariable(p: var Parser): PNode =
optInd(p, result)
result.add(parseExpr(p))
else: result = parseIdentColonEquals(p, {withPragma, withDot})
result[^1] = postExprBlocks(p, result[^1])
result.setLastSon postExprBlocks(p, result.lastSon)
indAndComment(p, result)
setEndInfo()
@@ -2339,8 +2351,8 @@ proc parseConstant(p: var Parser): PNode =
eat(p, tkEquals)
optInd(p, result)
#add(result, parseStmtListExpr(p))
result.add(parseExpr(p))
result[^1] = postExprBlocks(p, result[^1])
let a = parseExpr(p)
result.add postExprBlocks(p, a)
indAndComment(p, result)
setEndInfo()
@@ -2365,7 +2377,7 @@ proc parseStmtPragma(p: var Parser): PNode =
result = parsePragma(p)
if p.tok.tokType == tkColon and p.tok.indent < 0:
let a = result
result = newNodeI(nkPragmaBlock, a.info)
result = newNode(nkPragmaBlock, a.info)
getTok(p)
skipComment(p, result)
result.add a