new parsing scheme is documented

This commit is contained in:
Araq
2013-04-21 20:09:46 +02:00
parent 03764f0aba
commit 8a595b631b
4 changed files with 293 additions and 455 deletions

View File

@@ -30,7 +30,8 @@ import
type
TParser*{.final.} = object # a TParser object represents a module that
# is being parsed
currInd: int # current indentation (for skipInd)
currInd: int # current indentation
firstTok: bool
lex*: TLexer # the lexer that is used for parsing
tok*: TToken # the current token
@@ -78,6 +79,7 @@ proc OpenParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream) =
initToken(p.tok)
OpenLexer(p.lex, fileIdx, inputstream)
getTok(p) # read the first token
p.firstTok = true
proc OpenParser*(p: var TParser, filename: string, inputStream: PLLStream) =
openParser(p, filename.fileInfoIdx, inputStream)
@@ -129,11 +131,6 @@ proc getTokNoInd(p: var TParser) =
getTok(p)
if p.tok.indent >= 0: parMessage(p, errInvalidIndentation)
when false:
proc ExpectNl(p: TParser) =
if p.tok.tokType notin {tkEof, tkInd, tkComment}:
lexMessage(p.lex, errNewlineExpected, prettyTok(p.tok))
proc expectIdentOrKeyw(p: TParser) =
if p.tok.tokType != tkSymbol and not isKeyword(p.tok.tokType):
lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok))
@@ -225,9 +222,9 @@ proc isOperator(tok: TToken): bool =
#| module = stmt ^* (';' / IND{=})
#|
#| comma = ',' COMMENT? IND?
#| semicolon = ';' COMMENT IND?
#| colon = ':' COMMENT? IND?
#| comma = ',' COMMENT?
#| semicolon = ';' COMMENT?
#| colon = ':' COMMENT?
#| colcom = ':' COMMENT?
#|
#| operator = OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9
@@ -237,10 +234,10 @@ proc isOperator(tok: TToken): bool =
#|
#| prefixOperator = operator
#|
#| optInd = COMMENT? IND?
#| optPar = IND{>} | IND{=}
#| optInd = COMMENT?
#| optPar = (IND{>} | IND{=})?
#|
#| lowestExpr = assignExpr (OP0 optInd assignExpr)*
#| simpleExpr = assignExpr (OP0 optInd assignExpr)*
#| assignExpr = orExpr (OP1 optInd orExpr)*
#| orExpr = andExpr (OP2 optInd andExpr)*
#| andExpr = cmpExpr (OP3 optInd cmpExpr)*
@@ -591,7 +588,7 @@ type
proc primary(p: var TParser, mode: TPrimaryMode): PNode
proc lowestExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode =
proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode =
result = primary(p, mode)
# expand while operators have priorities higher than 'limit'
var opPrec = getPrecedence(p.tok)
@@ -604,15 +601,15 @@ proc lowestExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode =
getTok(p)
optInd(p, opNode)
# read sub-expression with higher priority:
var b = lowestExprAux(p, opPrec + leftAssoc, modeB)
var b = simpleExprAux(p, opPrec + leftAssoc, modeB)
addSon(a, opNode)
addSon(a, result)
addSon(a, b)
result = a
opPrec = getPrecedence(p.tok)
proc lowestExpr(p: var TParser, mode = pmNormal): PNode =
result = lowestExprAux(p, -1, mode)
proc simpleExpr(p: var TParser, mode = pmNormal): PNode =
result = simpleExprAux(p, -1, mode)
proc parseIfExpr(p: var TParser, kind: TNodeKind): PNode =
#| condExpr = expr colcom expr optInd
@@ -844,12 +841,12 @@ proc parseExpr(p: var TParser): PNode =
#| expr = (ifExpr
#| | whenExpr
#| | caseExpr)
#| / lowestExpr
#| / simpleExpr
case p.tok.tokType:
of tkIf: result = parseIfExpr(p, nkIfExpr)
of tkWhen: result = parseIfExpr(p, nkWhenExpr)
of tkCase: result = parseCase(p)
else: result = lowestExpr(p)
else: result = simpleExpr(p)
# XXX needs proper support:
#of tkTry: result = parseTry(p)
@@ -934,12 +931,12 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode =
result = primarySuffix(p, result)
proc parseTypeDesc(p: var TParser): PNode =
#| typeDesc = lowestExpr
result = lowestExpr(p, pmTypeDesc)
#| typeDesc = simpleExpr
result = simpleExpr(p, pmTypeDesc)
proc parseTypeDefAux(p: var TParser): PNode =
#| typeDefAux = lowestExpr
result = lowestExpr(p, pmTypeDef)
#| typeDefAux = simpleExpr
result = simpleExpr(p, pmTypeDef)
proc makeCall(n: PNode): PNode =
if n.kind in nkCallKinds:
@@ -949,7 +946,7 @@ proc makeCall(n: PNode): PNode =
result.add n
proc parseExprStmt(p: var TParser): PNode =
#| exprStmt = lowestExpr
#| exprStmt = simpleExpr
#| (( '=' optInd expr )
#| / ( expr ^+ comma
#| doBlocks
@@ -958,7 +955,7 @@ proc parseExprStmt(p: var TParser): PNode =
#| | IND{=} 'except' exprList ':' stmt
#| | IND{=} 'else' ':' stmt )*
#| ))?
var a = lowestExpr(p)
var a = simpleExpr(p)
if p.tok.tokType == tkEquals:
getTok(p)
optInd(p, result)
@@ -1131,7 +1128,7 @@ proc parseCase(p: var TParser): PNode =
#| (IND{=} 'elif' expr colcom stmt)*
#| (IND{=} 'else' colcom stmt)?
#| caseStmt = 'case' expr ':'? COMMENT?
#| (IND{>} ofBranches
#| (IND{>} ofBranches DED
#| | IND{=} ofBranches)
var
b: PNode
@@ -1447,7 +1444,7 @@ proc parseObjectCase(p: var TParser): PNode =
#| (IND{=} 'elif' expr colcom objectPart)*
#| (IND{=} 'else' colcom objectPart)?
#| objectCase = 'case' identWithPragma ':' typeDesc ':'? COMMENT?
#| (IND{>} objectBranches
#| (IND{>} objectBranches DED
#| | IND{=} objectBranches)
result = newNodeP(nkRecCase, p)
getTokNoInd(p)
@@ -1724,7 +1721,10 @@ proc parseAll(p: var TParser): PNode =
proc parseTopLevelStmt(p: var TParser): PNode =
result = ast.emptyNode
while true:
if p.tok.indent > 0: parMessage(p, errInvalidIndentation)
if p.tok.indent != 0:
if p.firstTok and p.tok.indent < 0: nil
else: parMessage(p, errInvalidIndentation)
p.firstTok = false
case p.tok.tokType
of tkSemicolon: getTok(p)
of tkEof: break

View File

@@ -1,204 +1,181 @@
module ::= ([COMMENT] [SAD] stmt)*
module = stmt ^* (';' / IND{=})
comma = ',' COMMENT?
semicolon = ';' COMMENT?
colon = ':' COMMENT?
colcom = ':' COMMENT?
comma ::= ',' [COMMENT] [IND]
semicolon ::= ';' [COMMENT] [IND]
operator = OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9
| 'or' | 'xor' | 'and'
| 'is' | 'isnot' | 'in' | 'notin' | 'of'
| 'div' | 'mod' | 'shl' | 'shr' | 'not' | 'addr' | 'static' | '..'
operator ::= OP0 | OP1 | OP2 | OP3 | OP4 | OP5 | OP6 | OP7 | OP8 | OP9
| 'or' | 'xor' | 'and'
| 'is' | 'isnot' | 'in' | 'notin' | 'of'
| 'div' | 'mod' | 'shl' | 'shr' | 'not' | 'addr' | 'static' | '..'
prefixOperator = operator
prefixOperator ::= operator
optInd ::= [COMMENT] [IND]
optPar ::= [IND] | [SAD]
lowestExpr ::= assignExpr (OP0 optInd assignExpr)*
assignExpr ::= orExpr (OP1 optInd orExpr)*
orExpr ::= andExpr (OP2 optInd andExpr)*
andExpr ::= cmpExpr (OP3 optInd cmpExpr)*
cmpExpr ::= sliceExpr (OP4 optInd sliceExpr)*
sliceExpr ::= ampExpr (OP5 optInd ampExpr)*
ampExpr ::= plusExpr (OP6 optInd plusExpr)*
plusExpr ::= mulExpr (OP7 optInd mulExpr)*
mulExpr ::= dollarExpr (OP8 optInd dollarExpr)*
dollarExpr ::= primary (OP9 optInd primary)*
indexExpr ::= expr
castExpr ::= 'cast' '[' optInd typeDesc optPar ']' '(' optInd expr optPar ')'
symbol ::= '`' (KEYWORD | IDENT | operator | '(' ')' | '[' ']' | '{' '}'
| '=' | literal)+ '`'
| IDENT
primaryPrefix ::= (prefixOperator | 'bind') optInd
primarySuffix ::= '.' optInd symbol [generalizedLit]
| '(' optInd namedExprList optPar ')'
| '[' optInd [indexExpr (comma indexExpr)* [comma]] optPar ']'
| '{' optInd [indexExpr (comma indexExpr)* [comma]] optPar '}'
primary ::= primaryPrefix* (symbol [generalizedLit] |
constructor | castExpr)
primarySuffix*
optInd = COMMENT?
optPar = (IND{>} | IND{=})?
simpleExpr = assignExpr (OP0 optInd assignExpr)*
assignExpr = orExpr (OP1 optInd orExpr)*
orExpr = andExpr (OP2 optInd andExpr)*
andExpr = cmpExpr (OP3 optInd cmpExpr)*
cmpExpr = sliceExpr (OP4 optInd sliceExpr)*
sliceExpr = ampExpr (OP5 optInd ampExpr)*
ampExpr = plusExpr (OP6 optInd plusExpr)*
plusExpr = mulExpr (OP7 optInd mulExpr)*
mulExpr = dollarExpr (OP8 optInd dollarExpr)*
dollarExpr = primary (OP9 optInd primary)*
symbol = '`' (KEYW|IDENT|operator|'(' ')'|'[' ']'|'{' '}'|'='|literal)+ '`'
| IDENT
indexExpr = expr
indexExprList = indexExpr ^+ comma
exprColonEqExpr = expr (':'|'=' expr)?
exprList = expr ^+ comma
dotExpr = expr '.' optInd ('type' | 'addr' | symbol)
qualifiedIdent = symbol ('.' optInd ('type' | 'addr' | symbol))?
exprColonEqExprList = exprColonEqExpr (comma exprColonEqExpr)* (comma)?
setOrTableConstr = '{' ((exprColonEqExpr comma)* | ':' ) '}'
castExpr = 'cast' '[' optInd typeDesc optPar ']' '(' optInd expr optPar ')'
generalizedLit ::= GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
literal ::= INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
| UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
| FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
| STR_LIT | RSTR_LIT | TRIPLESTR_LIT
| CHAR_LIT
| NIL
constructor ::= literal
| '[' optInd colonExprList optPar ']'
| '{' optInd ':' | colonExprList optPar '}'
| '(' optInd colonExprList optPar ')'
colonExpr ::= expr [':' expr]
colonExprList ::= [colonExpr (comma colonExpr)* [comma]]
namedExpr ::= expr ['=' expr]
namedExprList ::= [namedExpr (comma namedExpr)* [comma]]
exprOrType ::= lowestExpr
| 'if' expr ':' expr ('elif' expr ':' expr)* 'else' ':' expr
| 'var' exprOrType
| 'ref' exprOrType
| 'ptr' exprOrType
| 'type' exprOrType
| 'tuple' tupleDesc
expr ::= exprOrType
| 'proc' paramList [pragma] ['=' stmt]
| 'iterator' paramList [pragma] ['=' stmt]
exprList ::= [expr (comma expr)* [comma]]
qualifiedIdent ::= symbol ['.' symbol]
typeDesc ::= (exprOrType
| 'proc' paramList [pragma]
| 'iterator' paramList [pragma] )
['not' expr] # for now only 'not nil' suffix is supported
macroStmt ::= ':' [stmt] ('of' [exprList] ':' stmt
|'elif' expr ':' stmt
|'except' exceptList ':' stmt )*
['else' ':' stmt]
pragmaBlock ::= pragma [':' stmt]
simpleStmt ::= returnStmt
| yieldStmt
| discardStmt
| raiseStmt
| breakStmt
| continueStmt
| pragmaBlock
| importStmt
| fromStmt
| includeStmt
| exprStmt
complexStmt ::= ifStmt | whileStmt | caseStmt | tryStmt | forStmt
| blockStmt | staticStmt | asmStmt
| procDecl | iteratorDecl | macroDecl | templateDecl | methodDecl
| constSection | letSection | varSection
| typeSection | whenStmt | bindStmt
indPush ::= IND # and push indentation onto the stack
indPop ::= # pop indentation from the stack
stmt ::= simpleStmt [SAD]
| indPush (complexStmt | simpleStmt)
([SAD] (complexStmt | simpleStmt))*
DED indPop
exprStmt ::= lowestExpr ['=' expr | [expr (comma expr)*] [macroStmt]]
returnStmt ::= 'return' [expr]
yieldStmt ::= 'yield' expr
discardStmt ::= 'discard' expr
raiseStmt ::= 'raise' [expr]
breakStmt ::= 'break' [symbol]
continueStmt ::= 'continue'
ifStmt ::= 'if' expr ':' stmt ('elif' expr ':' stmt)* ['else' ':' stmt]
whenStmt ::= 'when' expr ':' stmt ('elif' expr ':' stmt)* ['else' ':' stmt]
caseStmt ::= 'case' expr [':'] ('of' exprList ':' stmt)*
('elif' expr ':' stmt)*
['else' ':' stmt]
whileStmt ::= 'while' expr ':' stmt
forStmt ::= 'for' symbol (comma symbol)* 'in' expr ':' stmt
exceptList ::= [qualifiedIdent (comma qualifiedIdent)*]
tryStmt ::= 'try' ':' stmt
('except' exceptList ':' stmt)*
['finally' ':' stmt]
asmStmt ::= 'asm' [pragma] (STR_LIT | RSTR_LIT | TRIPLESTR_LIT)
blockStmt ::= 'block' [symbol] ':' stmt
staticStmt ::= 'static' ':' stmt
filename ::= symbol | STR_LIT | RSTR_LIT | TRIPLESTR_LIT
importStmt ::= 'import' filename (comma filename)*
includeStmt ::= 'include' filename (comma filename)*
bindStmt ::= 'bind' qualifiedIdent (comma qualifiedIdent)*
fromStmt ::= 'from' filename 'import' symbol (comma symbol)*
pragma ::= '{.' optInd (colonExpr [comma])* optPar ('.}' | '}')
param ::= symbol (comma symbol)* (':' typeDesc ['=' expr] | '=' expr)
paramList ::= ['(' [param (comma|semicolon param)*] optPar ')'] [':' typeDesc]
genericConstraint ::= 'object' | 'tuple' | 'enum' | 'proc' | 'ref' | 'ptr'
| 'var' | 'distinct' | 'iterator' | primary
genericConstraints ::= genericConstraint ( '|' optInd genericConstraint )*
genericParam ::= symbol [':' genericConstraints] ['=' expr]
genericParams ::= '[' genericParam (comma|semicolon genericParam)* optPar ']'
routineDecl := symbol ['*'] [genericParams] paramList [pragma] ['=' stmt]
procDecl ::= 'proc' routineDecl
macroDecl ::= 'macro' routineDecl
iteratorDecl ::= 'iterator' routineDecl
templateDecl ::= 'template' routineDecl
methodDecl ::= 'method' routineDecl
colonAndEquals ::= [':' typeDesc] '=' expr
constDecl ::= symbol ['*'] [pragma] colonAndEquals [COMMENT | IND COMMENT]
| COMMENT
constSection ::= 'const' indPush constDecl (SAD constDecl)* DED indPop
letSection ::= 'let' indPush constDecl (SAD constDecl)* DED indPop
typeDef ::= typeDesc | objectDef | enumDef | 'distinct' typeDesc
objectField ::= symbol ['*'] [pragma]
objectIdentPart ::= objectField (comma objectField)* ':' typeDesc
[COMMENT|IND COMMENT]
objectWhen ::= 'when' expr ':' [COMMENT] objectPart
('elif' expr ':' [COMMENT] objectPart)*
['else' ':' [COMMENT] objectPart]
objectCase ::= 'case' expr ':' typeDesc [COMMENT]
('of' exprList ':' [COMMENT] objectPart)*
['else' ':' [COMMENT] objectPart]
objectPart ::= objectWhen | objectCase | objectIdentPart | 'nil'
| indPush objectPart (SAD objectPart)* DED indPop
tupleDesc ::= '[' optInd [param (comma|semicolon param)*] optPar ']'
objectDef ::= 'object' [pragma] ['of' typeDesc] objectPart
enumField ::= symbol ['=' expr]
enumDef ::= 'enum' (enumField [comma] [COMMENT | IND COMMENT])+
typeDecl ::= COMMENT
| symbol ['*'] [genericParams] ['=' typeDef] [COMMENT | IND COMMENT]
typeSection ::= 'type' indPush typeDecl (SAD typeDecl)* DED indPop
colonOrEquals ::= ':' typeDesc ['=' expr] | '=' expr
varField ::= symbol ['*'] [pragma]
varPart ::= symbol (comma symbol)* colonOrEquals [COMMENT | IND COMMENT]
varSection ::= 'var' (varPart
| indPush (COMMENT|varPart)
(SAD (COMMENT|varPart))* DED indPop)
identOrLiteral = generalizedLit | symbol
| INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
| UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
| FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
| STR_LIT | RSTR_LIT | TRIPLESTR_LIT
| CHAR_LIT
| NIL
| tupleConstr | arrayConstr | setOrTableConstr
| castExpr
tupleConstr = '(' optInd (exprColonEqExpr comma?)* optPar ')'
arrayConstr = '[' optInd (exprColonEqExpr comma?)* optPar ']'
primarySuffix = '(' (exprColonEqExpr comma?)* ')' doBlocks?
| doBlocks
| '.' optInd ('type' | 'addr' | symbol) generalizedLit?
| '[' optInd indexExprList optPar ']'
| '{' optInd indexExprList optPar '}'
condExpr = expr colcom expr optInd
('elif' expr colcom expr optInd)*
'else' colcom expr
ifExpr = 'if' condExpr
whenExpr = 'when' condExpr
pragma = '{.' optInd (exprColonExpr comma?)* optPar ('.}' | '}')
identVis = symbol opr? # postfix position
identWithPragma = identVis pragma?
declColonEquals = identWithPragma (comma identWithPragma)* comma?
(':' optInd typeDesc)? ('=' optInd expr)?
identColonEquals = ident (comma ident)* comma?
(':' optInd typeDesc)? ('=' optInd expr)?)
inlTupleDecl = 'tuple'
[' optInd (identColonEquals (comma/semicolon)?)* optPar ']'
extTupleDecl = 'tuple'
COMMENT? (IND{>} identColonEquals (IND{=} identColonEquals)*)?
paramList = '(' identColonEquals ^* (comma/semicolon) ')'
paramListArrow = paramList? ('->' optInd typeDesc)?
paramListColon = paramList? (':' optInd typeDesc)?
doBlock = 'do' paramListArrow pragmas? colcom stmt
doBlocks = doBlock ^* IND{=}
procExpr = 'proc' paramListColon pragmas? ('=' COMMENT? stmt)?
expr = (ifExpr
| whenExpr
| caseExpr)
/ simpleExpr
typeKeyw = 'var' | 'ref' | 'ptr' | 'shared' | 'type' | 'tuple'
| 'proc' | 'iterator' | 'distinct' | 'object' | 'enum'
primary = typeKeyw typeDescK
/ prefixOperator* identOrLiteral primarySuffix*
/ 'addr' primary
/ 'static' primary
/ 'bind' primary
typeDesc = simpleExpr
typeDefAux = simpleExpr
exprStmt = simpleExpr
(( '=' optInd expr )
/ ( expr ^+ comma
doBlocks
/ ':' stmt? ( IND{=} 'of' exprList ':' stmt
| IND{=} 'elif' expr ':' stmt
| IND{=} 'except' exprList ':' stmt
| IND{=} 'else' ':' stmt )*
))?
importStmt = 'import' optInd expr
((comma expr)*
/ 'except' optInd (expr ^+ comma))
includeStmt = 'include' optInd expr ^+ comma
fromStmt = 'from' expr 'import' optInd expr (comma expr)*
returnStmt = 'return' optInd expr?
raiseStmt = 'raise' optInd expr?
yieldStmt = 'yield' optInd expr?
discardStmt = 'discard' optInd expr?
breakStmt = 'break' optInd expr?
continueStmt = 'break' optInd expr?
condStmt = expr colcom stmt COMMENT?
(IND{=} 'elif' expr colcom stmt)*
(IND{=} 'else' colcom stmt)?
ifStmt = 'if' condStmt
whenStmt = 'when' condStmt
whileStmt = 'while' expr colcom stmt
ofBranch = 'of' exprList colcom stmt
ofBranches = ofBranch (IND{=} ofBranch)*
(IND{=} 'elif' expr colcom stmt)*
(IND{=} 'else' colcom stmt)?
caseStmt = 'case' expr ':'? COMMENT?
(IND{>} ofBranches DED
| IND{=} ofBranches)
tryStmt = 'try' colcom stmt &(IND{=}? 'except'|'finally')
(IND{=}? 'except' exprList colcom stmt)*
(IND{=}? 'finally' colcom stmt)?
exceptBlock = 'except' colcom stmt
forStmt = 'for' symbol (comma symbol)* 'in' expr colcom stmt
blockStmt = 'block' symbol? colcom stmt
staticStmt = 'static' colcom stmt
asmStmt = 'asm' pragma? (STR_LIT | RSTR_LIT | TRIPLE_STR_LIT)
genericParam = symbol (comma symbol)* (colon expr)? ('=' optInd expr)?
genericParamList = '[' optInd
genericParam ^* (comma/semicolon) optPar ']'
pattern = '{' stmt '}'
indAndComment = (IND{>} COMMENT)? | COMMENT?
routine = optInd identVis pattern? genericParamList?
paramListColon pragma? ('=' COMMENT? stmt)? indAndComment
commentStmt = COMMENT
section(p) = COMMENT? p / (IND{>} (p / COMMENT)^+IND{=} DED)
constant = identWithPragma (colon typedesc)? '=' optInd expr indAndComment
enum = 'enum' optInd (symbol optInd ('=' optInd expr COMMENT?)? comma?)+
objectWhen = 'when' expr colcom objectPart COMMENT?
('elif' expr colcom objectPart COMMENT?)*
('else' colcom objectPart COMMENT?)?
objectBranch = 'of' exprList colcom objectPart
objectBranches = objectBranch (IND{=} objectBranch)*
(IND{=} 'elif' expr colcom objectPart)*
(IND{=} 'else' colcom objectPart)?
objectCase = 'case' identWithPragma ':' typeDesc ':'? COMMENT?
(IND{>} objectBranches DED
| IND{=} objectBranches)
objectPart = IND{>} objectPart^+IND{=} DED
/ objectWhen / objectCase / 'nil' / declColonEquals
object = 'object' pragma? ('of' typeDesc)? COMMENT? objectPart
distinct = 'distinct' optInd typeDesc
typeDef = identWithPragma genericParamList? '=' optInd typeDefAux
indAndComment?
varTuple = '(' optInd identWithPragma ^+ comma optPar ')' '=' optInd expr
variable = (varTuple / identColonEquals) indAndComment
bindStmt = 'bind' optInd qualifiedIdent ^+ comma
mixinStmt = 'mixin' optInd qualifiedIdent ^+ comma
pragmaStmt = pragma (':' COMMENT? stmt)?
simpleStmt = ((returnStmt | raiseStmt | yieldStmt | discardStmt | breakStmt
| continueStmt | pragmaStmt | importStmt | exportStmt | fromStmt
| includeStmt | commentStmt) / exprStmt) COMMENT?
complexOrSimpleStmt = (ifStmt | whenStmt | whileStmt
| tryStmt | finallyStmt | exceptStmt | forStmt
| blockStmt | staticStmt | asmStmt
| 'proc' routine
| 'method' routine
| 'iterator' routine
| 'macro' routine
| 'template' routine
| 'converter' routine
| 'type' section(typeDef)
| 'const' section(constant)
| ('let' | 'var') section(variable)
| bindStmt | mixinStmt)
/ simpleStmt
stmt = (IND{>} complexOrSimpleStmt^+(IND{=} / ';') DED)
/ simpleStmt

View File

@@ -23,14 +23,25 @@ This document describes the lexis, the syntax, and the semantics of Nimrod.
The language constructs are explained using an extended BNF, in
which ``(a)*`` means 0 or more ``a``'s, ``a+`` means 1 or more ``a``'s, and
``(a)?`` means an optional *a*; an alternative spelling for optional parts is
``[a]``. The ``|`` symbol is used to mark alternatives
and has the lowest precedence. Parentheses may be used to group elements.
``(a)?`` means an optional *a*. Parentheses may be used to group elements.
The ``|``, ``/`` symbols are used to mark alternatives and have the lowest
precedence. ``/`` is the ordered choice that requires the parser to try the
alternatives in the given order. ``/`` is often used to ensure the grammar
is not ambiguous.
Non-terminals start with a lowercase letter, abstract terminal symbols are in
UPPERCASE. Verbatim terminal symbols (including keywords) are quoted
with ``'``. An example::
ifStmt ::= 'if' expr ':' stmts ('elif' expr ':' stmts)* ['else' stmts]
ifStmt = 'if' expr ':' stmts ('elif' expr ':' stmts)* ('else' stmts)?
The binary ``^*`` operator is used as a shorthand for 0 or more occurances
separated by its second argument; likewise ``^+`` means 1 or more
occurances: ``a ^+ b`` is short for ``a (b a)*``
and ``a ^* b`` is short for ``(a (b a)*)?``. Example::
arrayConstructor = '[' expr ^* ',' ']'
Other parts of Nimrod - like scoping rules or runtime semantics are only
described in an informal manner for now.
@@ -50,7 +61,7 @@ An `identifier`:idx: is a symbol declared as a name for a variable, type,
procedure, etc. The region of the program over which a declaration applies is
called the `scope`:idx: of the declaration. Scopes can be nested. The meaning
of an identifier is determined by the smallest enclosing scope in which the
identifier is declared.
identifier is declared unless overloading resolution rules suggest otherwise.
An expression specifies a computation that produces a value or location.
Expressions that produce locations are called `l-values`:idx:. An l-value
@@ -93,28 +104,31 @@ Nimrod's standard grammar describes an `indentation sensitive`:idx: language.
This means that all the control structures are recognized by indentation.
Indentation consists only of spaces; tabulators are not allowed.
The terminals ``IND`` (indentation), ``DED`` (dedentation) and ``SAD``
(same indentation) are generated by the scanner, denoting an indentation.
The indentation handling is implemented as follows: The lexer annotates the
following token with the preceeding number of spaces; indentation is not
a separate token. This trick allows parsing of Nimrod with only 1 token of
lookahead.
These terminals are only generated for lines that are not empty.
The parser uses a stack of indentation levels: the stack consists of integers
counting the spaces. The indentation information is queried at strategic
places in the parser but ignored otherwise: The pseudo terminal ``IND{>}``
denotes an indentation that consists of more spaces than the entry at the top
of the stack; IND{=} an indentation that has the same number of spaces. ``DED``
is another pseudo terminal that describes the *action* of popping a value
from the stack, ``IND{>}`` then implies to push onto the stack.
The parser and the scanner communicate over a stack which indentation terminal
should be generated: the stack consists of integers counting the spaces. The
stack is initialized with a zero on its top. The scanner reads from the stack:
If the current indentation token consists of more spaces than the entry at the
top of the stack, a ``IND`` token is generated, else if it consists of the same
number of spaces, a ``SAD`` token is generated. If it consists of fewer spaces,
a ``DED`` token is generated for any item on the stack that is greater than the
current. These items are later popped from the stack by the parser. At the end
of the file, a ``DED`` token is generated for each number remaining on the
stack that is larger than zero.
With this notation we can now easily define the core of the grammar: A block of
statements (simplified example)::
ifStmt = 'if' expr ':' stmt
(IND{=} 'elif' expr ':' stmt)*
(IND{=} 'else' ':' stmt)?
simpleStmt = ifStmt / ...
stmt = IND{>} stmt ^+ IND{=} DED # list of statements
/ simpleStmt # or a simple statement
Because the grammar contains some optional ``IND`` tokens, the scanner cannot
push new indentation levels. This has to be done by the parser. The symbol
``indPush`` indicates that an ``IND`` token is expected; the current number of
leading spaces is pushed onto the stack by the parser. The symbol ``indPop``
denotes that the parser pops an item from the indentation stack. No token is
consumed by ``indPop``.
Comments
@@ -416,11 +430,11 @@ and not the two tokens `{.`:tok:, `.}`:tok:.
Syntax
======
This section lists Nimrod's standard syntax in ENBF. How the parser receives
indentation tokens is already described in the `Lexical Analysis`_ section.
This section lists Nimrod's standard syntax. How the parser handles
the indentation is already described in the `Lexical Analysis`_ section.
Nimrod allows user-definable operators.
Binary operators have 10 different levels of precedence.
Binary operators have 10 different levels of precedence.
Relevant character
------------------
@@ -1040,7 +1054,7 @@ an ``object`` type or a ``ref object`` type:
.. code-block:: nimrod
var student = TStudent(name: "Anton", age: 5, id: 3)
For a ``ref object`` type ``new`` is invoked implicitly.
For a ``ref object`` type ``system.new`` is invoked implicitly.
Object variants
@@ -1701,44 +1715,20 @@ Statements and expressions
==========================
Nimrod uses the common statement/expression paradigm: `Statements`:idx: do not
produce a value in contrast to expressions. Call expressions are statements.
If the called procedure returns a value, it is not a valid statement
as statements do not produce values. To evaluate an expression for
side-effects and throw its value away, one can use the ``discard`` statement.
produce a value in contrast to expressions. However, some expressions are
statements.
Statements are separated into `simple statements`:idx: and
`complex statements`:idx:.
Simple statements are statements that cannot contain other statements like
assignments, calls or the ``return`` statement; complex statements can
contain other statements. To avoid the `dangling else problem`:idx:, complex
statements always have to be intended::
simpleStmt ::= returnStmt
| yieldStmt
| discardStmt
| raiseStmt
| breakStmt
| continueStmt
| pragma
| importStmt
| fromStmt
| includeStmt
| exprStmt
complexStmt ::= ifStmt | whileStmt | caseStmt | tryStmt | forStmt
| blockStmt | asmStmt
| procDecl | iteratorDecl | macroDecl | templateDecl
| constSection | letSection
| typeSection | whenStmt | varSection
statements always have to be intended. The details can be found in the grammar.
Discard statement
-----------------
Syntax::
discardStmt ::= 'discard' expr
Example:
.. code-block:: nimrod
@@ -1766,16 +1756,6 @@ been declared with the `discardable`:idx: pragma:
Var statement
-------------
Syntax::
colonOrEquals ::= ':' typeDesc ['=' expr] | '=' expr
varField ::= symbol ['*'] [pragma]
varPart ::= symbol (comma symbol)* [comma] colonOrEquals [COMMENT | IND COMMENT]
varSection ::= 'var' (varPart
| indPush (COMMENT|varPart)
(SAD (COMMENT|varPart))* DED indPop)
`Var`:idx: statements declare new local and global variables and
initialize them. A comma separated list of variables can be used to specify
variables of the same type:
@@ -1839,14 +1819,6 @@ For let variables the same pragmas are available as for ordinary variables.
Const section
-------------
Syntax::
colonAndEquals ::= [':' typeDesc] '=' expr
constDecl ::= symbol ['*'] [pragma] colonAndEquals [COMMENT | IND COMMENT]
| COMMENT
constSection ::= 'const' indPush constDecl (SAD constDecl)* DED indPop
`Constants`:idx: are symbols which are bound to a value. The constant's value
cannot change. The compiler must be able to evaluate the expression in a
constant declaration at compile time.
@@ -1877,10 +1849,6 @@ they contain such a type.
Static statement/expression
---------------------------
Syntax::
staticExpr ::= 'static' '(' optInd expr optPar ')'
staticStmt ::= 'static' ':' stmt
A `static`:idx: statement/expression can be used to enforce compile
time evaluation explicitly. Enforced compile time evaluation can even evaluate
code that has side effects:
@@ -1902,10 +1870,6 @@ support the FFI at compile time.
If statement
------------
Syntax::
ifStmt ::= 'if' expr ':' stmt ('elif' expr ':' stmt)* ['else' ':' stmt]
Example:
.. code-block:: nimrod
@@ -1932,12 +1896,6 @@ part, execution continues with the statement after the ``if`` statement.
Case statement
--------------
Syntax::
caseStmt ::= 'case' expr [':'] ('of' sliceExprList ':' stmt)*
('elif' expr ':' stmt)*
['else' ':' stmt]
Example:
.. code-block:: nimrod
@@ -1998,10 +1956,6 @@ a list of its elements:
When statement
--------------
Syntax::
whenStmt ::= 'when' expr ':' stmt ('elif' expr ':' stmt)* ['else' ':' stmt]
Example:
.. code-block:: nimrod
@@ -2032,10 +1986,6 @@ within ``object`` definitions.
Return statement
----------------
Syntax::
returnStmt ::= 'return' [expr]
Example:
.. code-block:: nimrod
@@ -2063,10 +2013,6 @@ variables, ``result`` is initialized to (binary) zero:
Yield statement
---------------
Syntax::
yieldStmt ::= 'yield' expr
Example:
.. code-block:: nimrod
@@ -2083,10 +2029,6 @@ for further information.
Block statement
---------------
Syntax::
blockStmt ::= 'block' [symbol] ':' stmt
Example:
.. code-block:: nimrod
@@ -2108,10 +2050,6 @@ block to specify which block is to leave.
Break statement
---------------
Syntax::
breakStmt ::= 'break' [symbol]
Example:
.. code-block:: nimrod
@@ -2125,10 +2063,6 @@ absent, the innermost block is left.
While statement
---------------
Syntax::
whileStmt ::= 'while' expr ':' stmt
Example:
.. code-block:: nimrod
@@ -2147,10 +2081,6 @@ so that they can be left with a ``break`` statement.
Continue statement
------------------
Syntax::
continueStmt ::= 'continue'
A `continue`:idx: statement leads to the immediate next iteration of the
surrounding loop construct. It is only allowed within a loop. A continue
statement is syntactic sugar for a nested block:
@@ -2173,9 +2103,6 @@ Is equivalent to:
Assembler statement
-------------------
Syntax::
asmStmt ::= 'asm' [pragma] (STR_LIT | RSTR_LIT | TRIPLESTR_LIT)
The direct embedding of `assembler`:idx: code into Nimrod code is supported
by the unsafe ``asm`` statement. Identifiers in the assembler code that refer to
@@ -2203,8 +2130,7 @@ Example:
var y = if x > 8: 9 else: 10
An if expression always results in a value, so the ``else`` part is
required. ``Elif`` parts are also allowed (but unlikely to be good
style).
required. ``Elif`` parts are also allowed.
When expression
---------------
@@ -2311,18 +2237,8 @@ procedure declaration defines an identifier and associates it with a block
of code.
A procedure may call itself recursively. A parameter may be given a default
value that is used if the caller does not provide a value for this parameter.
The syntax is::
param ::= symbol (comma symbol)* (':' typeDesc ['=' expr] | '=' expr)
paramList ::= ['(' [param (comma param)*] [SAD] ')'] [':' typeDesc]
genericParam ::= symbol [':' typeDesc] ['=' expr]
genericParams ::= '[' genericParam (comma genericParam)* [SAD] ']'
procDecl ::= 'proc' symbol ['*'] [genericParams] paramList [pragma]
['=' stmt]
If the ``= stmt`` part is missing, it is a `forward`:idx: declaration. If
If the proc declaration has no body, it is a `forward`:idx: declaration. If
the proc returns a value, the procedure body can access an implicitly declared
variable named `result`:idx: that represents the return value. Procs can be
overloaded. The overloading resolution algorithm tries to find the proc that is
@@ -2417,24 +2333,14 @@ Do notation
As a special more convenient notation, proc expressions involved in procedure
calls can use the ``do`` keyword:
Syntax::
primarySuffix ::= 'do' ['(' namedExprList ')'] ['->' typeDesc] ':'
As a start, let's repeat the example from the previous section:
.. code-block:: nimrod
cities.sort do (x,y: string) -> int:
cmp(x.len, y.len)
``do`` is written after the parentheses enclosing the regular proc params.
The proc expression represented by the do block is appended to them.
Again, let's see the equivalent of the previous example:
.. code-block:: nimrod
sort(cities) do (x,y: string) -> int:
cmp(x.len, y.len)
Finally, more than one ``do`` block can appear in a single call:
``do`` is written after the parentheses enclosing the regular proc params.
The proc expression represented by the do block is appended to them.
More than one ``do`` block can appear in a single call:
.. code-block:: nimrod
proc performWithUndo(task: proc(), undo: proc()) = ...
@@ -2635,30 +2541,16 @@ evaluation or dead code elimination do not work with methods.
Iterators and the for statement
===============================
Syntax::
forStmt ::= 'for' symbol (comma symbol)* [comma] 'in' expr ':' stmt
param ::= symbol (comma symbol)* [comma] ':' typeDesc
paramList ::= ['(' [param (comma param)* [comma]] ')'] [':' typeDesc]
genericParam ::= symbol [':' typeDesc]
genericParams ::= '[' genericParam (comma genericParam)* [comma] ']'
iteratorDecl ::= 'iterator' symbol ['*'] [genericParams] paramList [pragma]
['=' stmt]
The `for`:idx: statement is an abstract mechanism to iterate over the elements
of a container. It relies on an `iterator`:idx: to do so. Like ``while``
statements, ``for`` statements open an `implicit block`:idx:, so that they
can be left with a ``break`` statement.
The ``for`` loop declares
iteration variables (``x`` in the example) - their scope reaches until the
The ``for`` loop declares iteration variables - their scope reaches until the
end of the loop body. The iteration variables' types are inferred by the
return type of the iterator.
An iterator is similar to a procedure, except that it is always called in the
An iterator is similar to a procedure, except that it can be called in the
context of a ``for`` loop. Iterators provide a way to specify the iteration over
an abstract type. A key role in the execution of a ``for`` loop plays the
``yield`` statement in the called iterator. Whenever a ``yield`` statement is
@@ -2686,9 +2578,10 @@ The compiler generates code as if the programmer would have written this:
echo(ch)
inc(i)
If the iterator yields a tuple, there have to be as many iteration variables
If the iterator yields a tuple, there can be as many iteration variables
as there are components in the tuple. The i'th iteration variable's type is
the type of the i'th component.
the type of the i'th component. In other words, implicit tuple unpacking in a
for loop context is supported.
Implict items/pairs invocations
@@ -2792,23 +2685,10 @@ iterator that has already finished its work.
Type sections
=============
Syntax::
typeDef ::= typeDesc | objectDef | enumDef
genericParam ::= symbol [':' typeDesc]
genericParams ::= '[' genericParam (comma genericParam)* [comma] ']'
typeDecl ::= COMMENT
| symbol ['*'] [genericParams] ['=' typeDef] [COMMENT|IND COMMENT]
typeSection ::= 'type' indPush typeDecl (SAD typeDecl)* DED indPop
Example:
.. code-block:: nimrod
type # example demonstrates mutually recursive types
type # example demonstrating mutually recursive types
PNode = ref TNode # a traced pointer to a TNode
TNode = object
le, ri: PNode # left and right subtrees
@@ -2822,7 +2702,8 @@ Example:
A `type`:idx: section begins with the ``type`` keyword. It contains multiple
type definitions. A type definition binds a type to a name. Type definitions
can be recursive or even mutually recursive. Mutually recursive types are only
possible within a single ``type`` section.
possible within a single ``type`` section. Nominal types like ``objects``
or ``enums`` can only be defined in a ``type`` section.
Exception handling
@@ -2831,14 +2712,6 @@ Exception handling
Try statement
-------------
Syntax::
qualifiedIdent ::= symbol ['.' symbol]
exceptList ::= [qualifiedIdent (comma qualifiedIdent)* [comma]]
tryStmt ::= 'try' ':' stmt
('except' exceptList ':' stmt)*
['finally' ':' stmt]
Example:
.. code-block:: nimrod
@@ -2863,15 +2736,14 @@ Example:
close(f)
The statements after the `try`:idx: are executed in sequential order unless
an exception ``e`` is raised. If the exception type of ``e`` matches any
of the list ``exceptlist`` the corresponding statements are executed.
listed in an ``except`` clause the corresponding statements are executed.
The statements following the ``except`` clauses are called
`exception handlers`:idx:.
The empty `except`:idx: clause is executed if there is an exception that is
in no list. It is similar to an ``else`` clause in ``if`` statements.
not listed otherwise. It is similar to an ``else`` clause in ``if`` statements.
If there is a `finally`:idx: clause, it is always executed after the
exception handlers.
@@ -2916,10 +2788,6 @@ statements. Example:
Raise statement
---------------
Syntax::
raiseStmt ::= 'raise' [expr]
Example:
.. code-block:: nimrod
@@ -2948,17 +2816,21 @@ This allows for a Lisp-like `condition system`:idx:\:
.. code-block:: nimrod
var myFile = open("broken.txt", fmWrite)
try:
onRaise(proc (e: ref E_Base): bool =
onRaise do (e: ref E_Base)-> bool:
if e of EIO:
stdout.writeln "ok, writing to stdout instead"
else:
# do raise other exceptions:
result = true
)
myFile.writeln "writing to broken file"
finally:
myFile.close()
``OnRaise`` can only *filter* raised exceptions, it cannot transform one
exception into another. (Nor should ``onRaise`` raise an exception though
this is currently not enforced.) This restriction keeps the exception tracking
analysis sound.
Effect system
=============
@@ -3447,10 +3319,6 @@ Symbol binding within templates happens after template instantiation:
Bind statement
--------------
Syntax::
bindStmt ::= 'bind' IDENT (comma IDENT)*
Exporting a template is a often a leaky abstraction as it can depend on
symbols that are not visible from a client module. However, to compensate for
this case, a `bind`:idx: statement can be used: It declares all identifiers
@@ -3715,18 +3583,11 @@ Statement Macros
----------------
Statement macros are defined just as expression macros. However, they are
invoked by an expression following a colon::
exprStmt ::= lowestExpr ['=' expr | [expr (comma expr)* [comma]] [macroStmt]]
macroStmt ::= ':' [stmt] ('of' [sliceExprList] ':' stmt
| 'elif' expr ':' stmt
| 'except' exceptList ':' stmt )*
['else' ':' stmt]
invoked by an expression following a colon.
The following example outlines a macro that generates a lexical analyzer from
regular expressions:
.. code-block:: nimrod
import macros
@@ -3799,7 +3660,7 @@ instantiation type using the param name:
var tree = new(TBinaryTree[int])
When used with macros and .compileTime. procs on the other hand, the compiler
don't need to instantiate the code multiple times, because types then can be
does not need to instantiate the code multiple times, because types then can be
manipulated using the unified internal symbol representation. In such context
typedesc acts as any other type. One can create variables, store typedesc
values inside containers and so on. For example, here is how one can create
@@ -4358,13 +4219,6 @@ the compiler encounters any static error.
Pragmas
=======
Syntax::
colonExpr ::= expr [':' expr]
colonExprList ::= [colonExpr (comma colonExpr)* [comma]]
pragma ::= '{.' optInd (colonExpr [comma])* [SAD] ('.}' | '}')
Pragmas are Nimrod's method to give the compiler additional information /
commands without introducing a massive number of new keywords. Pragmas are
processed on the fly during semantic checking. Pragmas are enclosed in the
@@ -4411,10 +4265,10 @@ calls to any base class destructors in both user-defined and generated
destructors.
A destructor is attached to the type it destructs; expressions of this type
can then only be used in *destructible contexts*:
can then only be used in *destructible contexts* and as parameters:
.. code-block:: nimrod
type
type
TMyObj = object
x, y: int
p: pointer
@@ -4425,9 +4279,15 @@ can then only be used in *destructible contexts*:
proc open: TMyObj =
result = TMyObj(x: 1, y: 2, p: alloc(3))
proc work(o: TMyObj) =
echo o.x
# No destructor invoked here for 'o' as 'o' is a parameter.
proc main() =
# destructor automatically invoked at the end of the scope:
var x = open()
# valid: pass 'x' to some other proc:
work(x)
# Error: usage of a type with a destructor in a non destructible context
echo open()
@@ -4849,8 +4709,8 @@ a dynamic library (``.dll`` files for Windows, ``lib*.so`` files for UNIX).
The non-optional argument has to be the name of the dynamic library:
.. code-block:: Nimrod
proc gtk_image_new(): PGtkWidget {.
cdecl, dynlib: "libgtk-x11-2.0.so", importc.}
proc gtk_image_new(): PGtkWidget
{.cdecl, dynlib: "libgtk-x11-2.0.so", importc.}
In general, importing a dynamic library does not require any special linker
options or linking with import libraries. This also implies that no *devel*
@@ -4894,6 +4754,10 @@ strings, because they are precompiled.
**Note**: Passing variables to the ``dynlib`` pragma will fail at runtime
because of order of initialization problems.
**Note**: A ``dynlib`` import can be overriden with
the ``--dynlibOverride:name`` command line option. The Compiler User Guide
contains further information.
Dynlib pragma for export
------------------------
@@ -4971,7 +4835,7 @@ Nimrod supports the `actor model`:idx: of concurrency natively:
type
TMsgKind = enum
mLine, mEof
TMsg = object {.pure, final.}
TMsg = object
case k: TMsgKind
of mEof: nil
of mLine: data: string

View File

@@ -10,10 +10,8 @@ version 0.9.2
- parser/grammar: enforce 'simpleExpr' more often --> doesn't work; tkProc is
part of primary!
* check that of branches can only receive even simpler expressions, don't
allow of (var x = 23; nkIdent)
* document the new grammar: ^+ ^* operators; indentation handling
* remove rules in the manual as it's too hard to keep it up to date
* improve rules to contain the AST structure
allow 'of (var x = 23; nkIdent)'
* bugfix: 'import x var y = 0' compiles
* the typeDesc/expr unification is weird and only necessary because of
the ambiguous a[T] construct: It would be easy to support a[expr] for
generics but require a[.typeDesc] if that's required; this would also
@@ -48,7 +46,7 @@ version 0.9.4
- implement full 'not nil' checking
- make 'bind' default for templates and introduce 'mixin';
special rule for ``[]=``
- implicit deref for parameter matching; overloading based on 'var T'
- implicit deref for parameter matching
- ``=`` should be overloadable; requires specialization for ``=``; general
lift mechanism in the compiler is already implemented for 'fields'
- lazy overloading resolution:
@@ -66,9 +64,7 @@ version 0.9.X
- improve the compiler as a service
- better support for macros that rewrite procs
- macros need access to types and symbols (partially implemented)
- rethink the syntax/grammar:
* parser is not strict enough with newlines
* change comment handling in the AST
- perhaps: change comment handling in the AST
Concurrency
@@ -108,7 +104,8 @@ Not essential for 1.0.0
- mocking support with ``tyProxy`` that does: fallback for ``.`` operator
- overloading of ``.``? Special case ``.=``?
- allow implicit forward declarations of procs via a pragma (so that the
wrappers can deactivate it)
wrappers can deactivate it): better solution: introduce the notion of a
'proc section' that is similar to a type section.
- implement the "snoopResult" pragma; no, make a strutils with string append
semantics instead ...
- implement "closure tuple consists of a single 'ref'" optimization