From 2e2d9288465709d84a5d91abbb035583c23efe45 Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Sat, 4 Sep 2021 19:52:57 +0200 Subject: [PATCH] formal grammar updates [backport:1.2] (#18803) --- compiler/parser.nim | 50 +++++++++++++++++++---------------------- doc/grammar.txt | 37 +++++++++++++----------------- tools/grammar_nanny.nim | 1 + 3 files changed, 40 insertions(+), 48 deletions(-) diff --git a/compiler/parser.nim b/compiler/parser.nim index 225493902b..7d3c913dad 100644 --- a/compiler/parser.nim +++ b/compiler/parser.nim @@ -275,12 +275,6 @@ proc isRightAssociative(tok: Token): bool {.inline.} = result = tok.tokType == tkOpr and tok.ident.s[0] == '^' # or (tok.ident.s.len > 1 and tok.ident.s[^1] == '>') -proc isOperator(tok: Token): bool = - ## Determines if the given token is an operator type token. - tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs, - tkIsnot, tkNot, tkOf, tkAs, tkFrom, tkDotDot, tkAnd, - tkOr, tkXor} - proc isUnary(tok: Token): bool = ## Check if the given token is a unary operator tok.tokType in {tkOpr, tkDotDot} and @@ -323,6 +317,14 @@ proc checkBinary(p: Parser) {.inline.} = #| mulExpr = dollarExpr (OP9 optInd dollarExpr)* #| dollarExpr = primary (OP10 optInd primary)* +proc isOperator(tok: Token): bool = + #| operatorB = OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9 | + #| 'div' | 'mod' | 'shl' | 'shr' | 'in' | 'notin' | + #| 'is' | 'isnot' | 'not' | 'of' | 'as' | 'from' | '..' | 'and' | 'or' | 'xor' + tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs, + tkIsnot, tkNot, tkOf, tkAs, tkFrom, tkDotDot, tkAnd, + tkOr, tkXor} + proc colcom(p: var Parser, n: PNode) = eat(p, tkColon) skipComment(p, n) @@ -447,8 +449,6 @@ proc exprColonEqExprList(p: var Parser, kind: TNodeKind, exprColonEqExprListAux(p, endTok, result) proc dotExpr(p: var Parser, a: PNode): PNode = - #| dotExpr = expr '.' optInd (symbol | '[:' exprList ']') - #| explicitGenericInstantiation = '[:' exprList ']' ( '(' exprColonEqExpr ')' )? var info = p.parLineInfo getTok(p) result = newNodeI(nkDotExpr, info) @@ -469,7 +469,6 @@ proc dotExpr(p: var Parser, a: PNode): PNode = result = y proc dotLikeExpr(p: var Parser, a: PNode): PNode = - #| dotLikeExpr = expr DOTLIKEOP optInd symbol var info = p.parLineInfo result = newNodeI(nkInfix, info) optInd(p, result) @@ -661,7 +660,7 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode = #| | NIL #| generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT #| identOrLiteral = generalizedLit | symbol | literal - #| | par | arrayConstr | setOrTableConstr + #| | par | arrayConstr | setOrTableConstr | tupleConstr #| | castExpr #| tupleConstr = '(' optInd (exprColonEqExpr comma?)* optPar ')' #| arrayConstr = '[' optInd (exprColonEqExpr comma?)* optPar ']' @@ -808,7 +807,7 @@ proc isDotLike(tok: Token): bool = proc primarySuffix(p: var Parser, r: PNode, baseIndent: int, mode: PrimaryMode): PNode = #| primarySuffix = '(' (exprColonEqExpr comma?)* ')' - #| | '.' optInd symbol generalizedLit? + #| | '.' optInd symbol ('[:' exprList ']' ( '(' exprColonEqExpr ')' )?)? generalizedLit? #| | DOTLIKEOP optInd symbol generalizedLit? #| | '[' optInd exprColonEqExprList optPar ']' #| | '{' optInd exprColonEqExprList optPar '}' @@ -1013,11 +1012,9 @@ proc parseIdentColonEquals(p: var Parser, flags: DeclaredIdentFlags): PNode = result.add(newNodeP(nkEmpty, p)) proc parseTuple(p: var Parser, indentAllowed = false): PNode = - #| inlTupleDecl = 'tuple' - #| '[' optInd (identColonEquals (comma/semicolon)?)* optPar ']' - #| extTupleDecl = 'tuple' + #| tupleDecl = 'tuple' + #| '[' optInd (identColonEquals (comma/semicolon)?)* optPar ']' | #| COMMENT? (IND{>} identColonEquals (IND{=} identColonEquals)*)? - #| tupleClass = 'tuple' result = newNodeP(nkTupleTy, p) getTok(p) if p.tok.tokType == tkBracketLe: @@ -1123,7 +1120,7 @@ proc parseDoBlock(p: var Parser; info: TLineInfo): PNode = genericParams = p.emptyNode, pragmas = pragmas, exceptions = p.emptyNode) proc parseProcExpr(p: var Parser; isExpr: bool; kind: TNodeKind): PNode = - #| procExpr = 'proc' paramListColon pragma? ('=' COMMENT? stmt)? + #| routineExpr = ('proc' | 'func' | 'iterator') paramListColon pragma? ('=' COMMENT? stmt)? # either a proc type or a anonymous proc let info = parLineInfo(p) getTok(p) @@ -1165,7 +1162,6 @@ proc parseSymbolList(p: var Parser, result: PNode) = proc parseTypeDescKAux(p: var Parser, kind: TNodeKind, mode: PrimaryMode): PNode = - #| distinct = 'distinct' optInd typeDesc result = newNodeP(kind, p) getTok(p) if p.tok.indent != -1 and p.tok.indent <= p.currInd: return @@ -1254,12 +1250,14 @@ proc parseObject(p: var Parser): PNode proc parseTypeClass(p: var Parser): PNode proc primary(p: var Parser, mode: PrimaryMode): PNode = - #| typeKeyw = 'var' | 'out' | 'ref' | 'ptr' | 'shared' | 'tuple' - #| | 'proc' | 'iterator' | 'distinct' | 'object' | 'enum' - #| primary = typeKeyw optInd typeDesc + #| primary = operatorB primary primarySuffix* | + #| tupleDecl | routineExpr | enumDecl + #| objectDecl | conceptDecl | ('bind' primary) + #| ('var' | 'out' | 'ref' | 'ptr' | 'distinct') primary #| / prefixOperator* identOrLiteral primarySuffix* - #| / 'bind' primary if isOperator(p.tok): + # Note 'sigil like' operators are currently not reflected in the grammar + # and should be removed for Nim 2.0, I don't think anybody uses them. let isSigil = isSigilLike(p.tok) result = newNodeP(nkPrefix, p) var a = newIdentNodeP(p.tok.ident, p) @@ -1342,7 +1340,6 @@ proc parseTypeDesc(p: var Parser): PNode = proc parseTypeDefAux(p: var Parser): PNode = #| typeDefAux = simpleExpr ('not' expr)? - #| | 'concept' typeClass result = simpleExpr(p, pmTypeDef) result = binaryNot(p, result) @@ -1695,7 +1692,6 @@ proc parseTry(p: var Parser; isExpr: bool): PNode = if b == nil: parMessage(p, "expected 'except'") proc parseExceptBlock(p: var Parser, kind: TNodeKind): PNode = - #| exceptBlock = 'except' colcom stmt result = newNodeP(kind, p) getTok(p) colcom(p, result) @@ -1867,7 +1863,7 @@ proc parseSection(p: var Parser, kind: TNodeKind, parMessage(p, errIdentifierExpected, p.tok) proc parseEnum(p: var Parser): PNode = - #| enum = 'enum' optInd (symbol pragma? optInd ('=' optInd expr COMMENT?)? comma?)+ + #| enumDecl = 'enum' optInd (symbol pragma? optInd ('=' optInd expr COMMENT?)? comma?)+ result = newNodeP(nkEnumTy, p) getTok(p) result.add(p.emptyNode) @@ -2015,7 +2011,7 @@ proc parseObjectPart(p: var Parser): PNode = result = p.emptyNode proc parseObject(p: var Parser): PNode = - #| object = 'object' pragma? ('of' typeDesc)? COMMENT? objectPart + #| objectDecl = 'object' pragma? ('of' typeDesc)? COMMENT? objectPart result = newNodeP(nkObjectTy, p) getTok(p) if p.tok.tokType == tkCurlyDotLe and p.validInd: @@ -2057,8 +2053,8 @@ proc parseTypeClassParam(p: var Parser): PNode = result = p.parseSymbol proc parseTypeClass(p: var Parser): PNode = - #| typeClassParam = ('var' | 'out')? symbol - #| typeClass = typeClassParam ^* ',' (pragma)? ('of' typeDesc ^* ',')? + #| conceptParam = ('var' | 'out')? symbol + #| conceptDecl = 'concept' conceptParam ^* ',' (pragma)? ('of' typeDesc ^* ',')? #| &IND{>} stmt result = newNodeP(nkTypeClassTy, p) getTok(p) diff --git a/doc/grammar.txt b/doc/grammar.txt index f58621b97b..8f86dd98c2 100644 --- a/doc/grammar.txt +++ b/doc/grammar.txt @@ -22,14 +22,14 @@ ampExpr = plusExpr (OP7 optInd plusExpr)* plusExpr = mulExpr (OP8 optInd mulExpr)* mulExpr = dollarExpr (OP9 optInd dollarExpr)* dollarExpr = primary (OP10 optInd primary)* +operatorB = OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9 | + 'div' | 'mod' | 'shl' | 'shr' | 'in' | 'notin' | + 'is' | 'isnot' | 'not' | 'of' | 'as' | 'from' | '..' | 'and' | 'or' | 'xor' symbol = '`' (KEYW|IDENT|literal|(operator|'('|')'|'['|']'|'{'|'}'|'=')+)+ '`' | IDENT | KEYW exprColonEqExpr = expr (':'|'=' expr)? exprList = expr ^+ comma exprColonEqExprList = exprColonEqExpr (comma exprColonEqExpr)* (comma)? -dotExpr = expr '.' optInd (symbol | '[:' exprList ']') -explicitGenericInstantiation = '[:' exprList ']' ( '(' exprColonEqExpr ')' )? -dotLikeExpr = expr DOTLIKEOP optInd symbol qualifiedIdent = symbol ('.' optInd symbol)? setOrTableConstr = '{' ((exprColonEqExpr comma)* | ':' ) '}' castExpr = 'cast' ('[' optInd typeDesc optPar ']' '(' optInd expr optPar ')') / @@ -51,12 +51,12 @@ literal = | INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT | NIL generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT identOrLiteral = generalizedLit | symbol | literal - | par | arrayConstr | setOrTableConstr + | par | arrayConstr | setOrTableConstr | tupleConstr | castExpr tupleConstr = '(' optInd (exprColonEqExpr comma?)* optPar ')' arrayConstr = '[' optInd (exprColonEqExpr comma?)* optPar ']' primarySuffix = '(' (exprColonEqExpr comma?)* ')' - | '.' optInd symbol generalizedLit? + | '.' optInd symbol ('[:' exprList ']' ( '(' exprColonEqExpr ')' )?)? generalizedLit? | DOTLIKEOP optInd symbol generalizedLit? | '[' optInd exprColonEqExprList optPar ']' | '{' optInd exprColonEqExprList optPar '}' @@ -70,17 +70,14 @@ declColonEquals = identWithPragma (comma identWithPragma)* comma? (':' optInd typeDesc)? ('=' optInd expr)? identColonEquals = IDENT (comma IDENT)* comma? (':' optInd typeDesc)? ('=' optInd expr)?) -inlTupleDecl = 'tuple' - '[' optInd (identColonEquals (comma/semicolon)?)* optPar ']' -extTupleDecl = 'tuple' +tupleDecl = 'tuple' + '[' optInd (identColonEquals (comma/semicolon)?)* optPar ']' | COMMENT? (IND{>} identColonEquals (IND{=} identColonEquals)*)? -tupleClass = 'tuple' paramList = '(' declColonEquals ^* (comma/semicolon) ')' paramListArrow = paramList? ('->' optInd typeDesc)? paramListColon = paramList? (':' optInd typeDesc)? doBlock = 'do' paramListArrow pragma? colcom stmt -procExpr = 'proc' paramListColon pragma? ('=' COMMENT? stmt)? -distinct = 'distinct' optInd typeDesc +routineExpr = ('proc' | 'func' | 'iterator') paramListColon pragma? ('=' COMMENT? stmt)? forStmt = 'for' (identWithPragma ^+ comma) 'in' expr colcom stmt forExpr = forStmt expr = (blockExpr @@ -90,14 +87,13 @@ expr = (blockExpr | forExpr | tryExpr) / simpleExpr -typeKeyw = 'var' | 'out' | 'ref' | 'ptr' | 'shared' | 'tuple' - | 'proc' | 'iterator' | 'distinct' | 'object' | 'enum' -primary = typeKeyw optInd typeDesc +primary = operatorB primary primarySuffix* | + tupleDecl | routineExpr | enumDecl + objectDecl | conceptDecl | ('bind' primary) + ('var' | 'out' | 'ref' | 'ptr' | 'distinct') primary / prefixOperator* identOrLiteral primarySuffix* - / 'bind' primary typeDesc = simpleExpr ('not' expr)? typeDefAux = simpleExpr ('not' expr)? - | 'concept' typeClass postExprBlocks = ':' stmt? ( IND{=} doBlock | IND{=} 'of' exprList ':' stmt | IND{=} 'elif' expr ':' stmt @@ -147,7 +143,6 @@ tryStmt = 'try' colcom stmt &(IND{=}? 'except'|'finally') tryExpr = 'try' colcom stmt &(optInd 'except'|'finally') (optInd 'except' exprList colcom stmt)* (optInd 'finally' colcom stmt)? -exceptBlock = 'except' colcom stmt blockStmt = 'block' symbol? colcom stmt blockExpr = 'block' symbol? colcom stmt staticStmt = 'static' colcom stmt @@ -162,7 +157,7 @@ routine = optInd identVis pattern? genericParamList? paramListColon pragma? ('=' COMMENT? stmt)? indAndComment commentStmt = COMMENT section(RULE) = COMMENT? RULE / (IND{>} (RULE / COMMENT)^+IND{=} DED) -enum = 'enum' optInd (symbol pragma? optInd ('=' optInd expr COMMENT?)? comma?)+ +enumDecl = 'enum' optInd (symbol pragma? optInd ('=' optInd expr COMMENT?)? comma?)+ objectWhen = 'when' expr colcom objectPart COMMENT? ('elif' expr colcom objectPart COMMENT?)* ('else' colcom objectPart COMMENT?)? @@ -175,9 +170,9 @@ objectCase = 'case' identWithPragma ':' typeDesc ':'? COMMENT? | IND{=} objectBranches) objectPart = IND{>} objectPart^+IND{=} DED / objectWhen / objectCase / 'nil' / 'discard' / declColonEquals -object = 'object' pragma? ('of' typeDesc)? COMMENT? objectPart -typeClassParam = ('var' | 'out')? symbol -typeClass = typeClassParam ^* ',' (pragma)? ('of' typeDesc ^* ',')? +objectDecl = 'object' pragma? ('of' typeDesc)? COMMENT? objectPart +conceptParam = ('var' | 'out')? symbol +conceptDecl = 'concept' conceptParam ^* ',' (pragma)? ('of' typeDesc ^* ',')? &IND{>} stmt typeDef = identWithPragmaDot genericParamList? '=' optInd typeDefAux indAndComment? / identVisDot genericParamList? pragma '=' optInd typeDefAux diff --git a/tools/grammar_nanny.nim b/tools/grammar_nanny.nim index 5120ef9761..502412c3ce 100644 --- a/tools/grammar_nanny.nim +++ b/tools/grammar_nanny.nim @@ -13,6 +13,7 @@ proc checkGrammarFileImpl(cache: IdentCache, config: ConfigRef) = var stream = llStreamOpen(data) var declaredSyms = initHashSet[string]() var usedSyms = initHashSet[string]() + usedSyms.incl "module" # 'module' is the start rule. if stream != nil: declaredSyms.incl "section" # special case for 'section(RULE)' in the grammar var