Document parser structures and procedures.

This commit is contained in:
Clay Sweetser
2014-05-24 23:28:13 -04:00
parent b3a39524ee
commit 3afbe24b7c

View File

@@ -28,25 +28,20 @@ import
llstream, lexer, idents, strutils, ast, astalgo, msgs
type
TParser*{.final.} = object # a TParser object represents a module that
TParser*{.final.} = object # A TParser object represents a module that
# is being parsed
currInd: int # current indentation
firstTok, strongSpaces: bool
lex*: TLexer # the lexer that is used for parsing
tok*: TToken # the current token
inPragma: int
currInd: int # current indentation level
firstTok, strongSpaces: bool # Has the first token been read?
# Is strongSpaces on?
lex*: TLexer # The lexer that is used for parsing
tok*: TToken # The current token
inPragma: int # Pragma level
inSemiStmtList: int
proc parseAll*(p: var TParser): PNode
proc closeParser*(p: var TParser)
proc parseTopLevelStmt*(p: var TParser): PNode
# implements an iterator. Returns the next top-level statement or
# emtyNode if end of stream.
proc parseString*(s: string, filename: string = "", line: int = 0): PNode
# filename and line could be set optionally, when the string originates
# from a certain source file. This way, the compiler could generate
# correct error messages referring to the original source.
# helpers for the other parsers
proc isOperator*(tok: TToken): bool
@@ -72,11 +67,15 @@ proc parseTry(p: var TParser): PNode
proc parseCase(p: var TParser): PNode
# implementation
proc getTok(p: var TParser) =
proc getTok(p: var TParser) =
## Get the next token from the parser's lexer, and store it in the parser's
## `tok` member.
rawGetTok(p.lex, p.tok)
proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream,
strongSpaces=false) =
## Open a parser, using the given arguments to set up its internal state.
##
initToken(p.tok)
openLexer(p.lex, fileIdx, inputStream)
getTok(p) # read the first token
@@ -87,13 +86,16 @@ proc openParser*(p: var TParser, filename: string, inputStream: PLLStream,
strongSpaces=false) =
openParser(p, filename.fileInfoIdx, inputstream, strongSpaces)
proc closeParser(p: var TParser) =
proc closeParser(p: var TParser) =
## Close a parser, freeing up its resources.
closeLexer(p.lex)
proc parMessage(p: TParser, msg: TMsgKind, arg: string = "") =
proc parMessage(p: TParser, msg: TMsgKind, arg = "") =
## Produce and emit the parser message `arg` to output.
lexMessage(p.lex, msg, arg)
proc parMessage(p: TParser, msg: TMsgKind, tok: TToken) =
proc parMessage(p: TParser, msg: TMsgKind, tok: TToken) =
## Produce and emit a parser message to output about the token `tok`
lexMessage(p.lex, msg, prettyTok(tok))
template withInd(p: expr, body: stmt) {.immediate.} =
@@ -143,10 +145,13 @@ proc expectIdent(p: TParser) =
lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok))
proc eat(p: var TParser, tokType: TTokType) =
## Move the parser to the next token if the current token is of type
## `tokType`, otherwise error.
if p.tok.tokType == tokType: getTok(p)
else: lexMessage(p.lex, errTokenExpected, TokTypeToStr[tokType])
proc parLineInfo(p: TParser): TLineInfo =
## Retrieve the line information associated with the parser's current state.
result = getLineInfo(p.lex, p.tok)
proc indAndComment(p: var TParser, n: PNode) =
@@ -192,9 +197,11 @@ proc isSigilLike(tok: TToken): bool {.inline.} =
result = tok.tokType == tkOpr and relevantOprChar(tok.ident) == '@'
proc isLeftAssociative(tok: TToken): bool {.inline.} =
## Determines whether the token is left assocative.
result = tok.tokType != tkOpr or relevantOprChar(tok.ident) != '^'
proc getPrecedence(tok: TToken, strongSpaces: bool): int =
## Calculates the precedence of the given token.
template considerStrongSpaces(x): expr =
x + (if strongSpaces: 100 - tok.strongSpaceA.int*10 else: 0)
@@ -224,22 +231,26 @@ proc getPrecedence(tok: TToken, strongSpaces: bool): int =
else: result = -10
proc isOperator(tok: TToken): bool =
## Determines if the given token is an operator type token.
tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs,
tkIsnot, tkNot, tkOf, tkAs, tkDotDot, tkAnd, tkOr, tkXor}
proc isUnary(p: TParser): bool =
## Check if the current parser token is a unary operator
p.strongSpaces and p.tok.tokType in {tkOpr, tkDotDot} and
p.tok.strongSpaceB == 0 and
p.tok.strongSpaceA > 0
proc checkBinary(p: TParser) {.inline.} =
## Check if the current parser token is a binary operator.
# we don't check '..' here as that's too annoying
if p.strongSpaces and p.tok.tokType == tkOpr:
if p.tok.strongSpaceB > 0 and p.tok.strongSpaceA != p.tok.strongSpaceB:
parMessage(p, errGenerated, "number of spaces around '$#' not consistent"%
prettyTok(p.tok))
parMessage(p, errGenerated,
"Number of spaces around '$#' not consistent" %
prettyTok(p.tok))
elif p.tok.strongSpaceA notin {0,1,2,4,8}:
parMessage(p, errGenerated, "number of spaces must be 0,1,2,4 or 8")
parMessage(p, errGenerated, "Number of spaces must be 0,1,2,4 or 8")
#| module = stmt ^* (';' / IND{=})
#|
@@ -1108,6 +1119,7 @@ proc parseTypeDefAux(p: var TParser): PNode =
result = simpleExpr(p, pmTypeDef)
proc makeCall(n: PNode): PNode =
## Creates a call if the given node isn't already a call.
if n.kind in nkCallKinds:
result = n
else:
@@ -1952,7 +1964,8 @@ proc parseStmt(p: var TParser): PNode =
if p.tok.tokType != tkSemiColon: break
getTok(p)
proc parseAll(p: var TParser): PNode =
proc parseAll(p: var TParser): PNode =
## Parses the rest of the input stream held by the parser into a PNode.
result = newNodeP(nkStmtList, p)
while p.tok.tokType != tkEof:
var a = complexOrSimpleStmt(p)
@@ -1966,6 +1979,8 @@ proc parseAll(p: var TParser): PNode =
parMessage(p, errInvalidIndentation)
proc parseTopLevelStmt(p: var TParser): PNode =
## Implements an iterator which, when called repeatedly, returns the next
## top-level statement or emptyNode if end of stream.
result = ast.emptyNode
while true:
if p.tok.indent != 0:
@@ -1984,6 +1999,10 @@ proc parseTopLevelStmt(p: var TParser): PNode =
break
proc parseString(s: string, filename: string = "", line: int = 0): PNode =
## Parses a string into an AST, returning the top node.
## `filename` and `line`, although optional, provide info so that the
## compiler can generate correct error messages referring to the original
## source.
var stream = llStreamOpen(s)
stream.lineOffset = line