Document parser structures and procedures.

2026-07-17 22:41:14 +00:00 · 2014-05-24 23:28:13 -04:00
parent b3a39524ee
commit 3afbe24b7c
1 changed files with 39 additions and 20 deletions
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -28,25 +28,20 @@ import
  llstream, lexer, idents, strutils, ast, astalgo, msgs

 type
-  TParser*{.final.} = object  # a TParser object represents a module that
+  TParser*{.final.} = object  # A TParser object represents a module that
                              # is being parsed
-    currInd: int              # current indentation
-    firstTok, strongSpaces: bool
-    lex*: TLexer              # the lexer that is used for parsing
-    tok*: TToken              # the current token
-    inPragma: int
+    currInd: int              # current indentation level
+    firstTok, strongSpaces: bool # Has the first token been read?
+                                 # Is strongSpaces on?
+    lex*: TLexer              # The lexer that is used for parsing
+    tok*: TToken              # The current token
+    inPragma: int             # Pragma level
    inSemiStmtList: int

 proc parseAll*(p: var TParser): PNode
 proc closeParser*(p: var TParser)
 proc parseTopLevelStmt*(p: var TParser): PNode
-  # implements an iterator. Returns the next top-level statement or
-  # emtyNode if end of stream.
-
 proc parseString*(s: string, filename: string = "", line: int = 0): PNode
-  # filename and line could be set optionally, when the string originates 
-  # from a certain source file. This way, the compiler could generate
-  # correct error messages referring to the original source.
  
 # helpers for the other parsers
 proc isOperator*(tok: TToken): bool
@@ -72,11 +67,15 @@ proc parseTry(p: var TParser): PNode
 proc parseCase(p: var TParser): PNode
 # implementation

-proc getTok(p: var TParser) = 
+proc getTok(p: var TParser) =
+  ## Get the next token from the parser's lexer, and store it in the parser's
+  ## `tok` member.
  rawGetTok(p.lex, p.tok)

 proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream,
                 strongSpaces=false) =
+  ## Open a parser, using the given arguments to set up its internal state.
+  ## 
  initToken(p.tok)
  openLexer(p.lex, fileIdx, inputStream)
  getTok(p)                   # read the first token
@@ -87,13 +86,16 @@ proc openParser*(p: var TParser, filename: string, inputStream: PLLStream,
                 strongSpaces=false) =
  openParser(p, filename.fileInfoIdx, inputstream, strongSpaces)

-proc closeParser(p: var TParser) = 
+proc closeParser(p: var TParser) =
+  ## Close a parser, freeing up its resources.
  closeLexer(p.lex)

-proc parMessage(p: TParser, msg: TMsgKind, arg: string = "") = 
+proc parMessage(p: TParser, msg: TMsgKind, arg = "") =
+  ## Produce and emit the parser message `arg` to output.
  lexMessage(p.lex, msg, arg)

-proc parMessage(p: TParser, msg: TMsgKind, tok: TToken) = 
+proc parMessage(p: TParser, msg: TMsgKind, tok: TToken) =
+  ## Produce and emit a parser message to output about the token `tok`
  lexMessage(p.lex, msg, prettyTok(tok))

 template withInd(p: expr, body: stmt) {.immediate.} =
@@ -143,10 +145,13 @@ proc expectIdent(p: TParser) =
    lexMessage(p.lex, errIdentifierExpected, prettyTok(p.tok))
  
 proc eat(p: var TParser, tokType: TTokType) =
+  ## Move the parser to the next token if the current token is of type
+  ## `tokType`, otherwise error.
  if p.tok.tokType == tokType: getTok(p)
  else: lexMessage(p.lex, errTokenExpected, TokTypeToStr[tokType])
  
 proc parLineInfo(p: TParser): TLineInfo =
+  ## Retrieve the line information associated with the parser's current state.
  result = getLineInfo(p.lex, p.tok)

 proc indAndComment(p: var TParser, n: PNode) =
@@ -192,9 +197,11 @@ proc isSigilLike(tok: TToken): bool {.inline.} =
  result = tok.tokType == tkOpr and relevantOprChar(tok.ident) == '@'

 proc isLeftAssociative(tok: TToken): bool {.inline.} =
+  ## Determines whether the token is left assocative.
  result = tok.tokType != tkOpr or relevantOprChar(tok.ident) != '^'

 proc getPrecedence(tok: TToken, strongSpaces: bool): int =
+  ## Calculates the precedence of the given token.
  template considerStrongSpaces(x): expr =
    x + (if strongSpaces: 100 - tok.strongSpaceA.int*10 else: 0)

@@ -224,22 +231,26 @@ proc getPrecedence(tok: TToken, strongSpaces: bool): int =
  else: result = -10

 proc isOperator(tok: TToken): bool =
+  ## Determines if the given token is an operator type token.
  tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs,
                  tkIsnot, tkNot, tkOf, tkAs, tkDotDot, tkAnd, tkOr, tkXor}

 proc isUnary(p: TParser): bool =
+  ## Check if the current parser token is a unary operator
  p.strongSpaces and p.tok.tokType in {tkOpr, tkDotDot} and
    p.tok.strongSpaceB == 0 and
    p.tok.strongSpaceA > 0

 proc checkBinary(p: TParser) {.inline.} =
+  ## Check if the current parser token is a binary operator.
  # we don't check '..' here as that's too annoying
  if p.strongSpaces and p.tok.tokType == tkOpr:
    if p.tok.strongSpaceB > 0 and p.tok.strongSpaceA != p.tok.strongSpaceB:
-      parMessage(p, errGenerated, "number of spaces around '$#' not consistent"%
-        prettyTok(p.tok))
+      parMessage(p, errGenerated,
+                 "Number of spaces around '$#' not consistent" %
+                 prettyTok(p.tok))
    elif p.tok.strongSpaceA notin {0,1,2,4,8}:
-      parMessage(p, errGenerated, "number of spaces must be 0,1,2,4 or 8")
+      parMessage(p, errGenerated, "Number of spaces must be 0,1,2,4 or 8")

 #| module = stmt ^* (';' / IND{=})
 #|
@@ -1108,6 +1119,7 @@ proc parseTypeDefAux(p: var TParser): PNode =
  result = simpleExpr(p, pmTypeDef)

 proc makeCall(n: PNode): PNode =
+  ## Creates a call if the given node isn't already a call.
  if n.kind in nkCallKinds:
    result = n
  else:
@@ -1952,7 +1964,8 @@ proc parseStmt(p: var TParser): PNode =
          if p.tok.tokType != tkSemiColon: break
          getTok(p)
  
-proc parseAll(p: var TParser): PNode = 
+proc parseAll(p: var TParser): PNode =
+  ## Parses the rest of the input stream held by the parser into a PNode.
  result = newNodeP(nkStmtList, p)
  while p.tok.tokType != tkEof: 
    var a = complexOrSimpleStmt(p)
@@ -1966,6 +1979,8 @@ proc parseAll(p: var TParser): PNode =
      parMessage(p, errInvalidIndentation)

 proc parseTopLevelStmt(p: var TParser): PNode =
+  ## Implements an iterator which, when called repeatedly, returns the next
+  ## top-level statement or emptyNode if end of stream.
  result = ast.emptyNode
  while true:
    if p.tok.indent != 0: 
@@ -1984,6 +1999,10 @@ proc parseTopLevelStmt(p: var TParser): PNode =
      break

 proc parseString(s: string, filename: string = "", line: int = 0): PNode =
+  ## Parses a string into an AST, returning the top node.
+  ## `filename` and `line`, although optional, provide info so that the
+  ## compiler can generate correct error messages referring to the original
+  ## source.
  var stream = llStreamOpen(s)
  stream.lineOffset = line