mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 01:14:41 +00:00
follow up https://github.com/nim-lang/Nim/pull/22851 follow up https://github.com/nim-lang/Nim/pull/22873
3884 lines
140 KiB
Nim
3884 lines
140 KiB
Nim
#
|
|
#
|
|
# Nim's Runtime Library
|
|
# (c) Copyright 2012 Andreas Rumpf
|
|
#
|
|
# See the file "copying.txt", included in this
|
|
# distribution, for details about the copyright.
|
|
#
|
|
|
|
## This module implements a `reStructuredText`:idx: (RST) and
|
|
## `Markdown`:idx: parser.
|
|
## User's manual on supported markup syntax and command line usage can be
|
|
## found in [Nim-flavored Markdown and reStructuredText](markdown_rst.html).
|
|
##
|
|
## * See also [Nim DocGen Tools Guide](docgen.html) for handling of
|
|
## ``.nim`` files.
|
|
## * See also [packages/docutils/rstgen module](rstgen.html) to know how to
|
|
## generate HTML or Latex strings (for embedding them into custom documents).
|
|
##
|
|
## Choice between Markdown and RST as well as optional additional features are
|
|
## turned on by passing ``options:`` [RstParseOptions] to [proc rstParse].
|
|
|
|
import
|
|
std/[os, strutils, enumutils, algorithm, lists, sequtils,
|
|
tables, strscans]
|
|
import dochelpers, rstidx, rstast
|
|
import std/private/miscdollars
|
|
from highlite import SourceLanguage, getSourceLanguage
|
|
|
|
when defined(nimPreviewSlimSystem):
|
|
import std/[assertions, syncio]
|
|
|
|
|
|
type
|
|
RstParseOption* = enum ## options for the RST parser
|
|
roSupportSmilies, ## make the RST parser support smilies like ``:)``
|
|
roSupportRawDirective, ## support the ``raw`` directive (don't support
|
|
## it for sandboxing)
|
|
roSupportMarkdown, ## support additional features of Markdown
|
|
roPreferMarkdown, ## parse as Markdown (keeping RST as "extension"
|
|
## to Markdown) -- implies `roSupportMarkdown`
|
|
roNimFile ## set for Nim files where default interpreted
|
|
## text role should be :nim:
|
|
roSandboxDisabled ## this option enables certain options
|
|
## (e.g. raw, include, importdoc)
|
|
## which are disabled by default as they can
|
|
## enable users to read arbitrary data and
|
|
## perform XSS if the parser is used in a web
|
|
## app.
|
|
|
|
RstParseOptions* = set[RstParseOption]
|
|
|
|
MsgClass* = enum
|
|
mcHint = "Hint",
|
|
mcWarning = "Warning",
|
|
mcError = "Error"
|
|
|
|
# keep the order in sync with compiler/docgen.nim and compiler/lineinfos.nim:
|
|
MsgKind* = enum ## the possible messages
|
|
meCannotOpenFile = "cannot open '$1'",
|
|
meExpected = "'$1' expected",
|
|
meMissingClosing = "$1",
|
|
meGridTableNotImplemented = "grid table is not implemented",
|
|
meMarkdownIllformedTable = "illformed delimiter row of a Markdown table",
|
|
meIllformedTable = "Illformed table: $1",
|
|
meNewSectionExpected = "new section expected $1",
|
|
meGeneralParseError = "general parse error",
|
|
meInvalidDirective = "invalid directive: '$1'",
|
|
meInvalidField = "invalid field: $1",
|
|
meFootnoteMismatch = "mismatch in number of footnotes and their refs: $1",
|
|
mwRedefinitionOfLabel = "redefinition of label '$1'",
|
|
mwUnknownSubstitution = "unknown substitution '$1'",
|
|
mwAmbiguousLink = "ambiguous doc link $1",
|
|
mwBrokenLink = "broken link '$1'",
|
|
mwUnsupportedLanguage = "language '$1' not supported",
|
|
mwUnsupportedField = "field '$1' not supported",
|
|
mwRstStyle = "RST style: $1",
|
|
mwUnusedImportdoc = "importdoc for '$1' is not used",
|
|
meSandboxedDirective = "disabled directive: '$1'",
|
|
|
|
MsgHandler* = proc (filename: string, line, col: int, msgKind: MsgKind,
|
|
arg: string) {.closure, gcsafe.} ## what to do in case of an error
|
|
FindFileHandler* = proc (filename: string): string {.closure, gcsafe.}
|
|
FindRefFileHandler* =
|
|
proc (targetRelPath: string):
|
|
tuple[targetPath: string, linkRelPath: string] {.closure, gcsafe.}
|
|
## returns where .html or .idx file should be found by its relative path;
|
|
## `linkRelPath` is a prefix to be added before a link anchor from such file
|
|
|
|
proc rstnodeToRefname*(n: PRstNode): string
|
|
proc addNodes*(n: PRstNode): string
|
|
proc getFieldValue*(n: PRstNode, fieldname: string): string {.gcsafe.}
|
|
proc getArgument*(n: PRstNode): string
|
|
|
|
# ----------------------------- scanner part --------------------------------
|
|
|
|
const
|
|
SymChars: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
|
|
SmileyStartChars: set[char] = {':', ';', '8'}
|
|
Smilies = {
|
|
":D": "icon_e_biggrin",
|
|
":-D": "icon_e_biggrin",
|
|
":)": "icon_e_smile",
|
|
":-)": "icon_e_smile",
|
|
";)": "icon_e_wink",
|
|
";-)": "icon_e_wink",
|
|
":(": "icon_e_sad",
|
|
":-(": "icon_e_sad",
|
|
":o": "icon_e_surprised",
|
|
":-o": "icon_e_surprised",
|
|
":shock:": "icon_eek",
|
|
":?": "icon_e_confused",
|
|
":-?": "icon_e_confused",
|
|
":-/": "icon_e_confused",
|
|
|
|
"8-)": "icon_cool",
|
|
|
|
":lol:": "icon_lol",
|
|
":x": "icon_mad",
|
|
":-x": "icon_mad",
|
|
":P": "icon_razz",
|
|
":-P": "icon_razz",
|
|
":oops:": "icon_redface",
|
|
":cry:": "icon_cry",
|
|
":evil:": "icon_evil",
|
|
":twisted:": "icon_twisted",
|
|
":roll:": "icon_rolleyes",
|
|
":!:": "icon_exclaim",
|
|
|
|
":?:": "icon_question",
|
|
":idea:": "icon_idea",
|
|
":arrow:": "icon_arrow",
|
|
":|": "icon_neutral",
|
|
":-|": "icon_neutral",
|
|
":mrgreen:": "icon_mrgreen",
|
|
":geek:": "icon_e_geek",
|
|
":ugeek:": "icon_e_ugeek"
|
|
}
|
|
SandboxDirAllowlist = [
|
|
"image", "code", "code-block", "admonition", "attention", "caution",
|
|
"container", "contents", "danger", "default-role", "error", "figure",
|
|
"hint", "important", "index", "note", "role", "tip", "title", "warning"]
|
|
|
|
type
|
|
TokType = enum
|
|
tkEof, tkIndent,
|
|
tkWhite, tkWord,
|
|
tkAdornment, # used for chapter adornment, transitions and
|
|
# horizontal table borders
|
|
tkPunct, # one or many punctuation characters
|
|
tkOther
|
|
Token = object # a RST token
|
|
kind*: TokType # the type of the token
|
|
ival*: int # the indentation or parsed integer value
|
|
symbol*: string # the parsed symbol as string
|
|
line*, col*: int # line and column of the token
|
|
|
|
TokenSeq = seq[Token]
|
|
Lexer = object of RootObj
|
|
buf*: cstring
|
|
bufpos*: int
|
|
line*, col*, baseIndent*: int
|
|
adornmentLine*: bool
|
|
escapeNext*: bool
|
|
|
|
proc getThing(L: var Lexer, tok: var Token, s: set[char]) =
|
|
tok.kind = tkWord
|
|
tok.line = L.line
|
|
tok.col = L.col
|
|
var pos = L.bufpos
|
|
while true:
|
|
tok.symbol.add(L.buf[pos])
|
|
inc pos
|
|
if L.buf[pos] notin s: break
|
|
inc L.col, pos - L.bufpos
|
|
L.bufpos = pos
|
|
|
|
proc isCurrentLineAdornment(L: var Lexer): bool =
|
|
var pos = L.bufpos
|
|
let c = L.buf[pos]
|
|
while true:
|
|
inc pos
|
|
if L.buf[pos] in {'\c', '\l', '\0'}:
|
|
break
|
|
if c == '+': # grid table
|
|
if L.buf[pos] notin {'-', '=', '+'}:
|
|
return false
|
|
else: # section adornment or table horizontal border
|
|
if L.buf[pos] notin {c, ' ', '\t', '\v', '\f'}:
|
|
return false
|
|
result = true
|
|
|
|
proc getPunctAdornment(L: var Lexer, tok: var Token) =
|
|
if L.adornmentLine:
|
|
tok.kind = tkAdornment
|
|
else:
|
|
tok.kind = tkPunct
|
|
tok.line = L.line
|
|
tok.col = L.col
|
|
var pos = L.bufpos
|
|
let c = L.buf[pos]
|
|
if not L.escapeNext and (c != '\\' or L.adornmentLine):
|
|
while true:
|
|
tok.symbol.add(L.buf[pos])
|
|
inc pos
|
|
if L.buf[pos] != c: break
|
|
elif L.escapeNext:
|
|
tok.symbol.add(L.buf[pos])
|
|
inc pos
|
|
else: # not L.escapeNext and c == '\\' and not L.adornmentLine
|
|
tok.symbol.add '\\'
|
|
inc pos
|
|
L.escapeNext = true
|
|
inc L.col, pos - L.bufpos
|
|
L.bufpos = pos
|
|
if tok.symbol == "\\": tok.kind = tkPunct
|
|
# nim extension: standalone \ can not be adornment
|
|
|
|
proc getBracket(L: var Lexer, tok: var Token) =
|
|
tok.kind = tkPunct
|
|
tok.line = L.line
|
|
tok.col = L.col
|
|
tok.symbol.add(L.buf[L.bufpos])
|
|
inc L.col
|
|
inc L.bufpos
|
|
|
|
proc getIndentAux(L: var Lexer, start: int): int =
|
|
var pos = start
|
|
# skip the newline (but include it in the token!)
|
|
if L.buf[pos] == '\r':
|
|
if L.buf[pos + 1] == '\n': inc pos, 2
|
|
else: inc pos
|
|
elif L.buf[pos] == '\n':
|
|
inc pos
|
|
while true:
|
|
case L.buf[pos]
|
|
of ' ', '\v', '\f':
|
|
inc pos
|
|
inc result
|
|
of '\t':
|
|
inc pos
|
|
result = result - (result mod 8) + 8
|
|
else:
|
|
break # EndOfFile also leaves the loop
|
|
if L.buf[pos] == '\0':
|
|
result = 0
|
|
elif L.buf[pos] == '\n' or L.buf[pos] == '\r':
|
|
# look at the next line for proper indentation:
|
|
result = getIndentAux(L, pos)
|
|
L.bufpos = pos # no need to set back buf
|
|
|
|
proc getIndent(L: var Lexer, tok: var Token) =
|
|
tok.col = 0
|
|
tok.kind = tkIndent # skip the newline (but include it in the token!)
|
|
tok.ival = getIndentAux(L, L.bufpos)
|
|
inc L.line
|
|
tok.line = L.line
|
|
L.col = tok.ival
|
|
tok.ival = max(tok.ival - L.baseIndent, 0)
|
|
tok.symbol = "\n" & spaces(tok.ival)
|
|
|
|
proc rawGetTok(L: var Lexer, tok: var Token) =
|
|
tok.symbol = ""
|
|
tok.ival = 0
|
|
if L.col == 0:
|
|
L.adornmentLine = false
|
|
var c = L.buf[L.bufpos]
|
|
case c
|
|
of 'a'..'z', 'A'..'Z', '\x80'..'\xFF', '0'..'9':
|
|
getThing(L, tok, SymChars)
|
|
of ' ', '\t', '\v', '\f':
|
|
getThing(L, tok, {' ', '\t'})
|
|
tok.kind = tkWhite
|
|
if L.buf[L.bufpos] in {'\r', '\n'}:
|
|
rawGetTok(L, tok) # ignore spaces before \n
|
|
of '\r', '\n':
|
|
getIndent(L, tok)
|
|
L.adornmentLine = false
|
|
of '!', '\"', '#', '$', '%', '&', '\'', '*', '+', ',', '-', '.',
|
|
'/', ':', ';', '<', '=', '>', '?', '@', '\\', '^', '_', '`',
|
|
'|', '~':
|
|
if L.col == 0:
|
|
L.adornmentLine = L.isCurrentLineAdornment()
|
|
getPunctAdornment(L, tok)
|
|
of '(', ')', '[', ']', '{', '}':
|
|
getBracket(L, tok)
|
|
else:
|
|
tok.line = L.line
|
|
tok.col = L.col
|
|
if c == '\0':
|
|
tok.kind = tkEof
|
|
else:
|
|
tok.kind = tkOther
|
|
tok.symbol.add(c)
|
|
inc L.bufpos
|
|
inc L.col
|
|
tok.col = max(tok.col - L.baseIndent, 0)
|
|
|
|
proc getTokens(buffer: string, tokens: var TokenSeq) =
|
|
var L: Lexer
|
|
var length = tokens.len
|
|
L.buf = cstring(buffer)
|
|
L.line = 0 # skip UTF-8 BOM
|
|
if L.buf[0] == '\xEF' and L.buf[1] == '\xBB' and L.buf[2] == '\xBF':
|
|
inc L.bufpos, 3
|
|
while true:
|
|
inc length
|
|
setLen(tokens, length)
|
|
let toEscape = L.escapeNext
|
|
rawGetTok(L, tokens[length - 1])
|
|
if toEscape: L.escapeNext = false
|
|
if tokens[length - 1].kind == tkEof: break
|
|
if tokens[0].kind == tkWhite:
|
|
# BUGFIX
|
|
tokens[0].ival = tokens[0].symbol.len
|
|
tokens[0].kind = tkIndent
|
|
|
|
type
|
|
LevelInfo = object
|
|
symbol: char # adornment character
|
|
hasOverline: bool # has also overline (besides underline)?
|
|
line: int # the last line of this style occurrence
|
|
# (for error message)
|
|
hasPeers: bool # has headings on the same level of hierarchy?
|
|
LiteralBlockKind = enum # RST-style literal blocks after `::`
|
|
lbNone,
|
|
lbIndentedLiteralBlock,
|
|
lbQuotedLiteralBlock
|
|
LevelMap = seq[LevelInfo] # Saves for each possible title adornment
|
|
# style its level in the current document.
|
|
SubstitutionKind = enum
|
|
rstSubstitution = "substitution",
|
|
hyperlinkAlias = "hyperlink alias",
|
|
implicitHyperlinkAlias = "implicitly-generated hyperlink alias"
|
|
Substitution = object
|
|
kind*: SubstitutionKind
|
|
key*: string
|
|
value*: PRstNode
|
|
info*: TLineInfo # place where the substitution was defined
|
|
AnchorRule = enum
|
|
arInternalRst, ## For automatically generated RST anchors (from
|
|
## headings, footnotes, inline internal targets):
|
|
## case-insensitive, 1-space-significant (by RST spec)
|
|
arExternalRst, ## For external .nim doc comments or .rst/.md
|
|
arNim ## For anchors generated by ``docgen.nim``: Nim-style case
|
|
## sensitivity, etc. (see `proc normalizeNimName`_ for details)
|
|
arHyperlink, ## For links with manually set anchors in
|
|
## form `text <pagename.html#anchor>`_
|
|
RstAnchorKind = enum
|
|
manualDirectiveAnchor = "manual directive anchor",
|
|
manualInlineAnchor = "manual inline anchor",
|
|
footnoteAnchor = "footnote anchor",
|
|
headlineAnchor = "implicitly-generated headline anchor"
|
|
AnchorSubst = object
|
|
info: TLineInfo # the file where the anchor was defined
|
|
priority: int
|
|
case kind: range[arInternalRst .. arNim]
|
|
of arInternalRst:
|
|
anchorType: RstAnchorKind
|
|
target: PRstNode
|
|
of arExternalRst:
|
|
anchorTypeExt: RstAnchorKind
|
|
refnameExt: string
|
|
of arNim:
|
|
module: FileIndex # anchor's module (generally not the same as file)
|
|
tooltip: string # displayed tooltip for Nim-generated anchors
|
|
langSym: LangSymbol
|
|
refname: string # A reference name that will be inserted directly
|
|
# into HTML/Latex.
|
|
external: bool
|
|
AnchorSubstTable = Table[string, seq[AnchorSubst]]
|
|
# use `seq` to account for duplicate anchors
|
|
FootnoteType = enum
|
|
fnManualNumber, # manually numbered footnote like [3]
|
|
fnAutoNumber, # auto-numbered footnote [#]
|
|
fnAutoNumberLabel, # auto-numbered with label [#label]
|
|
fnAutoSymbol, # auto-symbol footnote [*]
|
|
fnCitation # simple text label like [citation2021]
|
|
FootnoteSubst = tuple
|
|
kind: FootnoteType # discriminator
|
|
number: int # valid for fnManualNumber (always) and fnAutoNumber,
|
|
# fnAutoNumberLabel after resolveSubs is called
|
|
autoNumIdx: int # order of occurence: fnAutoNumber, fnAutoNumberLabel
|
|
autoSymIdx: int # order of occurence: fnAutoSymbol
|
|
label: string # valid for fnAutoNumberLabel
|
|
RstFileTable* = object
|
|
filenameToIdx*: Table[string, FileIndex]
|
|
idxToFilename*: seq[string]
|
|
ImportdocInfo = object
|
|
used: bool # was this import used?
|
|
fromInfo: TLineInfo # place of `.. importdoc::` directive
|
|
idxPath: string # full path to ``.idx`` file
|
|
linkRelPath: string # prefix before target anchor
|
|
title: string # document title obtained from ``.idx``
|
|
RstSharedState = object
|
|
options*: RstParseOptions # parsing options
|
|
hLevels: LevelMap # hierarchy of heading styles
|
|
hTitleCnt: int # =0 if no title, =1 if only main title,
|
|
# =2 if both title and subtitle are present
|
|
hCurLevel: int # current section level
|
|
currRole: string # current interpreted text role
|
|
currRoleKind: RstNodeKind # ... and its node kind
|
|
subs: seq[Substitution] # substitutions
|
|
refs*: seq[Substitution] # references
|
|
anchors*: AnchorSubstTable
|
|
# internal target substitutions
|
|
lineFootnoteNum: seq[TLineInfo] # footnote line, auto numbers .. [#]
|
|
lineFootnoteNumRef: seq[TLineInfo] # footnote line, their reference [#]_
|
|
currFootnoteNumRef: int # ... their counter for `resolveSubs`
|
|
lineFootnoteSym: seq[TLineInfo] # footnote line, auto symbols .. [*]
|
|
lineFootnoteSymRef: seq[TLineInfo] # footnote line, their reference [*]_
|
|
currFootnoteSymRef: int # ... their counter for `resolveSubs`
|
|
footnotes: seq[FootnoteSubst] # correspondence b/w footnote label,
|
|
# number, order of occurrence
|
|
msgHandler: MsgHandler # How to handle errors.
|
|
findFile: FindFileHandler # How to find files for include.
|
|
findRefFile: FindRefFileHandler
|
|
# How to find files imported by importdoc.
|
|
filenames*: RstFileTable # map file name <-> FileIndex (for storing
|
|
# file names for warnings after 1st stage)
|
|
currFileIdx*: FileIndex # current index in `filenames`
|
|
tocPart*: seq[PRstNode] # all the headings of a document
|
|
hasToc*: bool
|
|
idxImports*: Table[string, ImportdocInfo]
|
|
# map `importdoc`ed filename -> it's info
|
|
nimFileImported*: bool # Was any ``.nim`` module `importdoc`ed ?
|
|
|
|
PRstSharedState* = ref RstSharedState
|
|
ManualAnchor = object
|
|
alias: string # a (short) name that can substitute the `anchor`
|
|
anchor: string # anchor = id = refname
|
|
info: TLineInfo
|
|
RstParser = object of RootObj
|
|
idx*: int
|
|
tok*: TokenSeq
|
|
s*: PRstSharedState
|
|
indentStack*: seq[int]
|
|
line*, col*: int ## initial line/column of whole text or
|
|
## documenation fragment that will be added
|
|
## in case of error/warning reporting to
|
|
## (relative) line/column of the token.
|
|
curAnchors*: seq[ManualAnchor]
|
|
## seq to accumulate aliases for anchors:
|
|
## because RST can have >1 alias per 1 anchor
|
|
|
|
EParseError* = object of ValueError
|
|
SectionParser = proc (p: var RstParser): PRstNode {.nimcall, gcsafe.}
|
|
|
|
const
|
|
LineRstInit* = 1 ## Initial line number for standalone RST text
|
|
ColRstInit* = 0 ## Initial column number for standalone RST text
|
|
## (Nim global reporting adds ColOffset=1)
|
|
ColRstOffset* = 1 ## 1: a replica of ColOffset for internal use
|
|
|
|
template currentTok(p: RstParser): Token = p.tok[p.idx]
|
|
template prevTok(p: RstParser): Token = p.tok[p.idx - 1]
|
|
template nextTok(p: RstParser): Token = p.tok[p.idx + 1]
|
|
|
|
proc whichMsgClass*(k: MsgKind): MsgClass =
|
|
## returns which message class `k` belongs to.
|
|
case k.symbolName[1]
|
|
of 'e', 'E': result = mcError
|
|
of 'w', 'W': result = mcWarning
|
|
of 'h', 'H': result = mcHint
|
|
else: assert false, "msgkind does not fit naming scheme"
|
|
|
|
proc defaultMsgHandler*(filename: string, line, col: int, msgkind: MsgKind,
|
|
arg: string) =
|
|
let mc = msgkind.whichMsgClass
|
|
let a = $msgkind % arg
|
|
var message: string
|
|
toLocation(message, filename, line, col + ColRstOffset)
|
|
message.add " $1: $2" % [$mc, a]
|
|
if mc == mcError: raise newException(EParseError, message)
|
|
else: writeLine(stdout, message)
|
|
|
|
proc defaultFindFile*(filename: string): string =
|
|
if fileExists(filename): result = filename
|
|
else: result = ""
|
|
|
|
proc defaultFindRefFile*(filename: string): (string, string) =
|
|
(filename, "")
|
|
|
|
proc defaultRole(options: RstParseOptions): string =
|
|
if roNimFile in options: "nim" else: "literal"
|
|
|
|
proc whichRoleAux(sym: string): RstNodeKind =
|
|
let r = sym.toLowerAscii
|
|
case r
|
|
of "idx": result = rnIdx
|
|
of "literal": result = rnInlineLiteral
|
|
of "strong": result = rnStrongEmphasis
|
|
of "emphasis": result = rnEmphasis
|
|
of "sub", "subscript": result = rnSub
|
|
of "sup", "superscript": result = rnSup
|
|
# literal and code are the same in our implementation
|
|
of "code": result = rnInlineLiteral
|
|
of "program", "option", "tok": result = rnCodeFragment
|
|
# c++ currently can be spelled only as cpp, c# only as csharp
|
|
elif getSourceLanguage(r) != langNone:
|
|
result = rnInlineCode
|
|
else: # unknown role
|
|
result = rnUnknownRole
|
|
|
|
proc len(filenames: RstFileTable): int = filenames.idxToFilename.len
|
|
|
|
proc addFilename*(s: PRstSharedState, file1: string): FileIndex =
|
|
## Returns index of filename, adding it if it has not been used before
|
|
let nextIdx = s.filenames.len.FileIndex
|
|
result = getOrDefault(s.filenames.filenameToIdx, file1, default = nextIdx)
|
|
if result == nextIdx:
|
|
s.filenames.filenameToIdx[file1] = result
|
|
s.filenames.idxToFilename.add file1
|
|
|
|
proc setCurrFilename*(s: PRstSharedState, file1: string) =
|
|
s.currFileIdx = addFilename(s, file1)
|
|
|
|
proc getFilename(filenames: RstFileTable, fid: FileIndex): string =
|
|
doAssert(0 <= fid.int and fid.int < filenames.len,
|
|
"incorrect FileIndex $1 (range 0..$2)" % [
|
|
$fid.int, $(filenames.len - 1)])
|
|
result = filenames.idxToFilename[fid.int]
|
|
|
|
proc getFilename(s: PRstSharedState, subst: AnchorSubst): string =
|
|
getFilename(s.filenames, subst.info.fileIndex)
|
|
|
|
proc getModule(s: PRstSharedState, subst: AnchorSubst): string =
|
|
result = getFilename(s.filenames, subst.module)
|
|
|
|
proc currFilename(s: PRstSharedState): string =
|
|
getFilename(s.filenames, s.currFileIdx)
|
|
|
|
proc newRstSharedState*(options: RstParseOptions,
|
|
filename: string,
|
|
findFile: FindFileHandler,
|
|
findRefFile: FindRefFileHandler,
|
|
msgHandler: MsgHandler,
|
|
hasToc: bool): PRstSharedState =
|
|
let r = defaultRole(options)
|
|
result = PRstSharedState(
|
|
currRole: r,
|
|
currRoleKind: whichRoleAux(r),
|
|
options: options,
|
|
msgHandler: if not isNil(msgHandler): msgHandler else: defaultMsgHandler,
|
|
findFile: if not isNil(findFile): findFile else: defaultFindFile,
|
|
findRefFile:
|
|
if not isNil(findRefFile): findRefFile
|
|
else: defaultFindRefFile,
|
|
hasToc: hasToc
|
|
)
|
|
setCurrFilename(result, filename)
|
|
|
|
proc curLine(p: RstParser): int = p.line + currentTok(p).line
|
|
|
|
proc findRelativeFile(p: RstParser; filename: string): string =
|
|
result = p.s.currFilename.splitFile.dir / filename
|
|
if not fileExists(result):
|
|
result = p.s.findFile(filename)
|
|
|
|
proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string) =
|
|
p.s.msgHandler(p.s.currFilename, curLine(p),
|
|
p.col + currentTok(p).col, msgKind, arg)
|
|
|
|
proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string) =
|
|
s.msgHandler(s.currFilename, LineRstInit, ColRstInit, msgKind, arg)
|
|
|
|
proc rstMessage(s: PRstSharedState, msgKind: MsgKind, arg: string;
|
|
line, col: int) =
|
|
s.msgHandler(s.currFilename, line, col, msgKind, arg)
|
|
|
|
proc rstMessage(s: PRstSharedState, filename: string, msgKind: MsgKind,
|
|
arg: string) =
|
|
s.msgHandler(filename, LineRstInit, ColRstInit, msgKind, arg)
|
|
|
|
proc rstMessage*(filenames: RstFileTable, f: MsgHandler,
|
|
info: TLineInfo, msgKind: MsgKind, arg: string) =
|
|
## Print warnings using `info`, i.e. in 2nd-pass warnings for
|
|
## footnotes/substitutions/references or from ``rstgen.nim``.
|
|
let file = getFilename(filenames, info.fileIndex)
|
|
f(file, info.line.int, info.col.int, msgKind, arg)
|
|
|
|
proc rstMessage(p: RstParser, msgKind: MsgKind, arg: string, line, col: int) =
|
|
p.s.msgHandler(p.s.currFilename, p.line + line,
|
|
p.col + col, msgKind, arg)
|
|
|
|
proc rstMessage(p: RstParser, msgKind: MsgKind) =
|
|
p.s.msgHandler(p.s.currFilename, curLine(p),
|
|
p.col + currentTok(p).col, msgKind,
|
|
currentTok(p).symbol)
|
|
|
|
# Functions `isPureRst` & `stopOrWarn` address differences between
|
|
# Markdown and RST:
|
|
# * Markdown always tries to continue working. If it is really impossible
|
|
# to parse a markup element, its proc just returns `nil` and parsing
|
|
# continues for it as for normal text paragraph.
|
|
# The downside is that real mistakes/typos are often silently ignored.
|
|
# The same applies to legacy `RstMarkdown` mode for nimforum.
|
|
# * RST really signals errors. The downside is that it's more intrusive -
|
|
# the user must escape special syntax with \ explicitly.
|
|
#
|
|
# TODO: we need to apply this strategy to all markup elements eventually.
|
|
|
|
func isPureRst(p: RstParser): bool = roSupportMarkdown notin p.s.options
|
|
func isRst(p: RstParser): bool = roPreferMarkdown notin p.s.options
|
|
func isMd(p: RstParser): bool = roPreferMarkdown in p.s.options
|
|
func isMd(s: PRstSharedState): bool = roPreferMarkdown in s.options
|
|
|
|
proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string) =
|
|
let realMsgKind = if isPureRst(p): errorType else: mwRstStyle
|
|
rstMessage(p, realMsgKind, arg)
|
|
|
|
proc stopOrWarn(p: RstParser, errorType: MsgKind, arg: string, line, col: int) =
|
|
let realMsgKind = if isPureRst(p): errorType else: mwRstStyle
|
|
rstMessage(p, realMsgKind, arg, line, col)
|
|
|
|
proc currInd(p: RstParser): int =
|
|
result = p.indentStack[high(p.indentStack)]
|
|
|
|
proc pushInd(p: var RstParser, ind: int) =
|
|
p.indentStack.add(ind)
|
|
|
|
proc popInd(p: var RstParser) =
|
|
if p.indentStack.len > 1: setLen(p.indentStack, p.indentStack.len - 1)
|
|
|
|
# Working with indentation in rst.nim
|
|
# -----------------------------------
|
|
#
|
|
# Every line break has an associated tkIndent.
|
|
# The tokenizer writes back the first column of next non-blank line
|
|
# in all preceeding tkIndent tokens to the `ival` field of tkIndent.
|
|
#
|
|
# RST document is separated into body elements (B.E.), every of which
|
|
# has a dedicated handler proc (or block of logic when B.E. is a block quote)
|
|
# that should follow the next rule:
|
|
# Every B.E. handler proc should finish at tkIndent (newline)
|
|
# after its B.E. finishes.
|
|
# Then its callers (which is `parseSection` or another B.E. handler)
|
|
# check for tkIndent ival (without necessity to advance `p.idx`)
|
|
# and decide themselves whether they continue processing or also stop.
|
|
#
|
|
# An example::
|
|
#
|
|
# L RST text fragment indentation
|
|
# +--------------------+
|
|
# 1 | | <- (empty line at the start of file) no tokens
|
|
# 2 |First paragraph. | <- tkIndent has ival=0, and next tkWord has col=0
|
|
# 3 | | <- tkIndent has ival=0
|
|
# 4 |* bullet item and | <- tkIndent has ival=0, and next tkPunct has col=0
|
|
# 5 | its continuation | <- tkIndent has ival=2, and next tkWord has col=2
|
|
# 6 | | <- tkIndent has ival=4
|
|
# 7 | Block quote | <- tkIndent has ival=4, and next tkWord has col=4
|
|
# 8 | | <- tkIndent has ival=0
|
|
# 9 | | <- tkIndent has ival=0
|
|
# 10|Final paragraph | <- tkIndent has ival=0, and tkWord has col=0
|
|
# +--------------------+
|
|
# C:01234
|
|
#
|
|
# Here parser starts with initial `indentStack=[0]` and then calls the
|
|
# 1st `parseSection`:
|
|
#
|
|
# - `parseSection` calls `parseParagraph` and "First paragraph" is parsed
|
|
# - bullet list handler is started at reaching ``*`` (L4 C0), it
|
|
# starts bullet item logic (L4 C2), which calls `pushInd(p, ind=2)`,
|
|
# then calls `parseSection` (2nd call, nested) which parses
|
|
# paragraph "bullet list and its continuation" and then starts
|
|
# a block quote logic (L7 C4).
|
|
# The block quote logic calls calls `pushInd(p, ind=4)` and
|
|
# calls `parseSection` again, so a (simplified) sequence of calls now is::
|
|
#
|
|
# parseSection -> parseBulletList ->
|
|
# parseSection (+block quote logic) -> parseSection
|
|
#
|
|
# 3rd `parseSection` finishes, block quote logic calls `popInd(p)`,
|
|
# it returns to bullet item logic, which sees that next tkIndent has
|
|
# ival=0 and stops there since the required indentation for a bullet item
|
|
# is 2 and 0<2; the bullet item logic calls `popInd(p)`.
|
|
# Then bullet list handler checks that next tkWord (L10 C0) has the
|
|
# right indentation but does not have ``*`` so stops at tkIndent (L10).
|
|
# - 1st `parseSection` invocation calls `parseParagraph` and the
|
|
# "Final paragraph" is parsed.
|
|
#
|
|
# If a B.E. handler has advanced `p.idx` past tkIndent to check
|
|
# whether it should continue its processing or not, and decided not to,
|
|
# then this B.E. handler should step back (e.g. do `dec p.idx`).
|
|
|
|
proc initParser(p: var RstParser, sharedState: PRstSharedState) =
|
|
p.indentStack = @[0]
|
|
p.tok = @[]
|
|
p.idx = 0
|
|
p.col = ColRstInit
|
|
p.line = LineRstInit
|
|
p.s = sharedState
|
|
|
|
proc addNodesAux(n: PRstNode, result: var string) =
|
|
if n == nil:
|
|
return
|
|
if n.kind == rnLeaf:
|
|
result.add(n.text)
|
|
else:
|
|
for i in 0 ..< n.len: addNodesAux(n.sons[i], result)
|
|
|
|
proc addNodes(n: PRstNode): string =
|
|
n.addNodesAux(result)
|
|
|
|
proc linkName(n: PRstNode): string =
|
|
## Returns a normalized reference name, see:
|
|
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
|
|
n.addNodes.toLowerAscii
|
|
|
|
proc rstnodeToRefnameAux(n: PRstNode, r: var string, b: var bool) =
|
|
template special(s) =
|
|
if b:
|
|
r.add('-')
|
|
b = false
|
|
r.add(s)
|
|
|
|
if n == nil: return
|
|
if n.kind == rnLeaf:
|
|
for i in 0 ..< n.text.len:
|
|
case n.text[i]
|
|
of '0'..'9':
|
|
if b:
|
|
r.add('-')
|
|
b = false
|
|
if r.len == 0: r.add('Z')
|
|
r.add(n.text[i])
|
|
of 'a'..'z', '\128'..'\255':
|
|
if b:
|
|
r.add('-')
|
|
b = false
|
|
r.add(n.text[i])
|
|
of 'A'..'Z':
|
|
if b:
|
|
r.add('-')
|
|
b = false
|
|
r.add(chr(ord(n.text[i]) - ord('A') + ord('a')))
|
|
of '$': special "dollar"
|
|
of '%': special "percent"
|
|
of '&': special "amp"
|
|
of '^': special "roof"
|
|
of '!': special "emark"
|
|
of '?': special "qmark"
|
|
of '*': special "star"
|
|
of '+': special "plus"
|
|
of '-': special "minus"
|
|
of '/': special "slash"
|
|
of '\\': special "backslash"
|
|
of '=': special "eq"
|
|
of '<': special "lt"
|
|
of '>': special "gt"
|
|
of '~': special "tilde"
|
|
of ':': special "colon"
|
|
of '.': special "dot"
|
|
of '@': special "at"
|
|
of '|': special "bar"
|
|
else:
|
|
if r.len > 0: b = true
|
|
else:
|
|
for i in 0 ..< n.len: rstnodeToRefnameAux(n.sons[i], r, b)
|
|
|
|
proc rstnodeToRefname(n: PRstNode): string =
|
|
var b = false
|
|
rstnodeToRefnameAux(n, result, b)
|
|
|
|
proc findSub(s: PRstSharedState, n: PRstNode): int =
|
|
var key = addNodes(n)
|
|
# the spec says: if no exact match, try one without case distinction:
|
|
for i in countup(0, high(s.subs)):
|
|
if key == s.subs[i].key:
|
|
return i
|
|
for i in countup(0, high(s.subs)):
|
|
if cmpIgnoreStyle(key, s.subs[i].key) == 0:
|
|
return i
|
|
result = -1
|
|
|
|
proc lineInfo(p: RstParser, iTok: int): TLineInfo =
|
|
result.col = int16(p.col + p.tok[iTok].col)
|
|
result.line = uint16(p.line + p.tok[iTok].line)
|
|
result.fileIndex = p.s.currFileIdx
|
|
|
|
proc lineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx)
|
|
# TODO: we need this simplification because we don't preserve exact starting
|
|
# token of currently parsed element:
|
|
proc prevLineInfo(p: RstParser): TLineInfo = lineInfo(p, p.idx-1)
|
|
|
|
proc setSub(p: var RstParser, key: string, value: PRstNode) =
|
|
var length = p.s.subs.len
|
|
for i in 0 ..< length:
|
|
if key == p.s.subs[i].key:
|
|
p.s.subs[i].value = value
|
|
return
|
|
p.s.subs.add(Substitution(key: key, value: value, info: prevLineInfo(p)))
|
|
|
|
proc setRef(p: var RstParser, key: string, value: PRstNode,
|
|
refType: SubstitutionKind) =
|
|
var length = p.s.refs.len
|
|
for i in 0 ..< length:
|
|
if key == p.s.refs[i].key:
|
|
if p.s.refs[i].value.addNodes != value.addNodes:
|
|
rstMessage(p, mwRedefinitionOfLabel, key)
|
|
p.s.refs[i].value = value
|
|
return
|
|
p.s.refs.add(Substitution(kind: refType, key: key, value: value,
|
|
info: prevLineInfo(p)))
|
|
|
|
proc findRef(s: PRstSharedState, key: string): seq[Substitution] =
|
|
for i in countup(0, high(s.refs)):
|
|
if key == s.refs[i].key:
|
|
result.add s.refs[i]
|
|
|
|
# Ambiguity in links: we don't follow procedure of removing implicit targets
|
|
# defined in https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#implicit-hyperlink-targets
|
|
# Instead we just give explicit links a higher priority than to implicit ones
|
|
# and report ambiguities as warnings. Hopefully it is easy to remove
|
|
# ambiguities manually. Nim auto-generated links from ``docgen.nim``
|
|
# have lowest priority: 1 (for procs) and below for other symbol types.
|
|
|
|
proc refPriority(k: SubstitutionKind): int =
|
|
case k
|
|
of rstSubstitution: result = 8
|
|
of hyperlinkAlias: result = 7
|
|
of implicitHyperlinkAlias: result = 2
|
|
|
|
proc internalRefPriority(k: RstAnchorKind): int =
|
|
case k
|
|
of manualDirectiveAnchor: result = 6
|
|
of manualInlineAnchor: result = 5
|
|
of footnoteAnchor: result = 4
|
|
of headlineAnchor: result = 3
|
|
|
|
proc `$`(subst: AnchorSubst): string = # for debug
|
|
let s =
|
|
case subst.kind
|
|
of arInternalRst: "type=" & $subst.anchorType
|
|
of arExternalRst: "type=" & $subst.anchorTypeExt
|
|
of arNim: "langsym=" & $subst.langSym
|
|
result = "(kind=$1, priority=$2, $3)" % [$subst.kind, $subst.priority, s]
|
|
|
|
proc addAnchorRst(p: var RstParser, name: string, target: PRstNode,
|
|
anchorType: RstAnchorKind) =
|
|
## Associates node `target` (which has field `anchor`) with an
|
|
## alias `name` and updates the corresponding aliases in `p.curAnchors`.
|
|
let prio = internalRefPriority(anchorType)
|
|
for a in p.curAnchors:
|
|
p.s.anchors.mgetOrPut(a.alias, newSeq[AnchorSubst]()).add(
|
|
AnchorSubst(kind: arInternalRst, target: target, priority: prio,
|
|
info: a.info, anchorType: manualDirectiveAnchor))
|
|
if name != "":
|
|
p.s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
|
|
AnchorSubst(kind: arInternalRst, target: target, priority: prio,
|
|
info: prevLineInfo(p), anchorType: anchorType))
|
|
p.curAnchors.setLen 0
|
|
|
|
proc addAnchorExtRst(s: var PRstSharedState, key: string, refn: string,
|
|
anchorType: RstAnchorKind, info: TLineInfo) =
|
|
let name = key.toLowerAscii
|
|
let prio = internalRefPriority(anchorType)
|
|
s.anchors.mgetOrPut(name, newSeq[AnchorSubst]()).add(
|
|
AnchorSubst(kind: arExternalRst, refnameExt: refn, priority: prio,
|
|
info: info,
|
|
anchorTypeExt: anchorType))
|
|
|
|
proc addAnchorNim*(s: var PRstSharedState, external: bool, refn: string, tooltip: string,
|
|
langSym: LangSymbol, priority: int,
|
|
info: TLineInfo, module: FileIndex) =
|
|
## Adds an anchor `refn`, which follows
|
|
## the rule `arNim` (i.e. a symbol in ``*.nim`` file)
|
|
s.anchors.mgetOrPut(langSym.name, newSeq[AnchorSubst]()).add(
|
|
AnchorSubst(kind: arNim, external: external, refname: refn, langSym: langSym,
|
|
tooltip: tooltip, priority: priority,
|
|
info: info))
|
|
|
|
proc findMainAnchorNim(s: PRstSharedState, signature: PRstNode,
|
|
info: TLineInfo):
|
|
seq[AnchorSubst] =
|
|
var langSym: LangSymbol
|
|
try:
|
|
langSym = toLangSymbol(signature)
|
|
except ValueError: # parsing failed, not a Nim symbol
|
|
return
|
|
let substitutions = s.anchors.getOrDefault(langSym.name,
|
|
newSeq[AnchorSubst]())
|
|
if substitutions.len == 0:
|
|
return
|
|
# logic to select only groups instead of concrete symbols
|
|
# with overloads, note that the same symbol can be defined
|
|
# in multiple modules and `importdoc`ed:
|
|
type GroupKey = tuple[symKind: string, origModule: string]
|
|
# map (symKind, file) (like "proc", "os.nim") -> found symbols/groups:
|
|
var found: Table[GroupKey, seq[AnchorSubst]]
|
|
for subst in substitutions:
|
|
if subst.kind == arNim:
|
|
if match(subst.langSym, langSym):
|
|
let key: GroupKey = (subst.langSym.symKind, getModule(s, subst))
|
|
found.mgetOrPut(key, newSeq[AnchorSubst]()).add subst
|
|
for key, sList in found:
|
|
if sList.len == 1:
|
|
result.add sList[0]
|
|
else: # > 1, there are overloads, potential ambiguity in this `symKind`
|
|
if langSym.parametersProvided:
|
|
# there are non-group signatures, select only them
|
|
for s in sList:
|
|
if not s.langSym.isGroup:
|
|
result.add s
|
|
else: # when there are many overloads a link like foo_ points to all
|
|
# of them, so selecting the group
|
|
var foundGroup = false
|
|
for s in sList:
|
|
if s.langSym.isGroup:
|
|
result.add s
|
|
foundGroup = true
|
|
break
|
|
doAssert(foundGroup,
|
|
"docgen has not generated the group for $1 (file $2)" % [
|
|
langSym.name, getModule(s, sList[0]) ])
|
|
|
|
proc findMainAnchorRst(s: PRstSharedState, linkText: string, info: TLineInfo):
|
|
seq[AnchorSubst] =
|
|
let name = linkText.toLowerAscii
|
|
let substitutions = s.anchors.getOrDefault(name, newSeq[AnchorSubst]())
|
|
for s in substitutions:
|
|
if s.kind in {arInternalRst, arExternalRst}:
|
|
result.add s
|
|
|
|
proc addFootnoteNumManual(p: var RstParser, num: int) =
|
|
## add manually-numbered footnote
|
|
for fnote in p.s.footnotes:
|
|
if fnote.number == num:
|
|
rstMessage(p, mwRedefinitionOfLabel, $num)
|
|
return
|
|
p.s.footnotes.add((fnManualNumber, num, -1, -1, $num))
|
|
|
|
proc addFootnoteNumAuto(p: var RstParser, label: string) =
|
|
## add auto-numbered footnote.
|
|
## Empty label [#] means it'll be resolved by the occurrence.
|
|
if label == "": # simple auto-numbered [#]
|
|
p.s.lineFootnoteNum.add lineInfo(p)
|
|
p.s.footnotes.add((fnAutoNumber, -1, p.s.lineFootnoteNum.len, -1, label))
|
|
else: # auto-numbered with label [#label]
|
|
for fnote in p.s.footnotes:
|
|
if fnote.label == label:
|
|
rstMessage(p, mwRedefinitionOfLabel, label)
|
|
return
|
|
p.s.footnotes.add((fnAutoNumberLabel, -1, -1, -1, label))
|
|
|
|
proc addFootnoteSymAuto(p: var RstParser) =
|
|
p.s.lineFootnoteSym.add lineInfo(p)
|
|
p.s.footnotes.add((fnAutoSymbol, -1, -1, p.s.lineFootnoteSym.len, ""))
|
|
|
|
proc orderFootnotes(s: PRstSharedState) =
|
|
## numerate auto-numbered footnotes taking into account that all
|
|
## manually numbered ones always have preference.
|
|
## Save the result back to `s.footnotes`.
|
|
|
|
# Report an error if found any mismatch in number of automatic footnotes
|
|
proc listFootnotes(locations: seq[TLineInfo]): string =
|
|
var lines: seq[string]
|
|
for info in locations:
|
|
if s.filenames.len > 1:
|
|
let file = getFilename(s.filenames, info.fileIndex)
|
|
lines.add file & ":"
|
|
else: # no need to add file name here if there is only 1
|
|
lines.add ""
|
|
lines[^1].add $info.line
|
|
result.add $lines.len & " (lines " & join(lines, ", ") & ")"
|
|
if s.lineFootnoteNum.len != s.lineFootnoteNumRef.len:
|
|
rstMessage(s, meFootnoteMismatch,
|
|
"$1 != $2" % [listFootnotes(s.lineFootnoteNum),
|
|
listFootnotes(s.lineFootnoteNumRef)] &
|
|
" for auto-numbered footnotes")
|
|
if s.lineFootnoteSym.len != s.lineFootnoteSymRef.len:
|
|
rstMessage(s, meFootnoteMismatch,
|
|
"$1 != $2" % [listFootnotes(s.lineFootnoteSym),
|
|
listFootnotes(s.lineFootnoteSymRef)] &
|
|
" for auto-symbol footnotes")
|
|
|
|
var result: seq[FootnoteSubst]
|
|
var manuallyN, autoN, autoSymbol: seq[FootnoteSubst]
|
|
for fs in s.footnotes:
|
|
if fs.kind == fnManualNumber: manuallyN.add fs
|
|
elif fs.kind in {fnAutoNumber, fnAutoNumberLabel}: autoN.add fs
|
|
else: autoSymbol.add fs
|
|
|
|
if autoN.len == 0:
|
|
result = manuallyN
|
|
else:
|
|
# fill gaps between manually numbered footnotes in ascending order
|
|
manuallyN.sort() # sort by number - its first field
|
|
var lst = initSinglyLinkedList[FootnoteSubst]()
|
|
for elem in manuallyN: lst.append(elem)
|
|
var firstAuto = 0
|
|
if lst.head == nil or lst.head.value.number != 1:
|
|
# no manual footnote [1], start numeration from 1 for auto-numbered
|
|
lst.prepend (autoN[0].kind, 1, autoN[0].autoNumIdx, -1, autoN[0].label)
|
|
firstAuto = 1
|
|
var curNode = lst.head
|
|
var nextNode: SinglyLinkedNode[FootnoteSubst]
|
|
# go simultaneously through `autoN` and `lst` looking for gaps
|
|
for (kind, x, autoNumIdx, y, label) in autoN[firstAuto .. ^1]:
|
|
while (nextNode = curNode.next; nextNode != nil):
|
|
if nextNode.value.number - curNode.value.number > 1:
|
|
# gap found, insert new node `n` between curNode and nextNode:
|
|
var n = newSinglyLinkedNode((kind, curNode.value.number + 1,
|
|
autoNumIdx, -1, label))
|
|
curNode.next = n
|
|
n.next = nextNode
|
|
curNode = n
|
|
break
|
|
else:
|
|
curNode = nextNode
|
|
if nextNode == nil: # no gap found, just append
|
|
lst.append (kind, curNode.value.number + 1, autoNumIdx, -1, label)
|
|
curNode = lst.tail
|
|
result = lst.toSeq
|
|
|
|
# we use ASCII symbols instead of those recommended in RST specification:
|
|
const footnoteAutoSymbols = ["*", "^", "+", "=", "~", "$", "@", "%", "&"]
|
|
for fs in autoSymbol:
|
|
# assignment order: *, **, ***, ^, ^^, ^^^, ... &&&, ****, *****, ...
|
|
let i = fs.autoSymIdx - 1
|
|
let symbolNum = (i div 3) mod footnoteAutoSymbols.len
|
|
let nSymbols = (1 + i mod 3) + 3 * (i div (3 * footnoteAutoSymbols.len))
|
|
let label = footnoteAutoSymbols[symbolNum].repeat(nSymbols)
|
|
result.add((fs.kind, -1, -1, fs.autoSymIdx, label))
|
|
|
|
s.footnotes = result
|
|
|
|
proc getFootnoteNum(s: PRstSharedState, label: string): int =
|
|
## get number from label. Must be called after `orderFootnotes`.
|
|
result = -1
|
|
for fnote in s.footnotes:
|
|
if fnote.label == label:
|
|
return fnote.number
|
|
|
|
proc getFootnoteNum(s: PRstSharedState, order: int): int =
|
|
## get number from occurrence. Must be called after `orderFootnotes`.
|
|
result = -1
|
|
for fnote in s.footnotes:
|
|
if fnote.autoNumIdx == order:
|
|
return fnote.number
|
|
|
|
proc getAutoSymbol(s: PRstSharedState, order: int): string =
|
|
## get symbol from occurrence of auto-symbol footnote.
|
|
result = "???"
|
|
for fnote in s.footnotes:
|
|
if fnote.autoSymIdx == order:
|
|
return fnote.label
|
|
|
|
proc newRstNodeA(p: var RstParser, kind: RstNodeKind): PRstNode =
|
|
## create node and consume the current anchor
|
|
result = newRstNode(kind)
|
|
if p.curAnchors.len > 0:
|
|
result.anchor = p.curAnchors[0].anchor
|
|
addAnchorRst(p, "", result, manualDirectiveAnchor)
|
|
|
|
template newLeaf(s: string): PRstNode = newRstLeaf(s)
|
|
|
|
proc newLeaf(p: var RstParser): PRstNode =
|
|
result = newLeaf(currentTok(p).symbol)
|
|
|
|
proc validRefnamePunct(x: string): bool =
|
|
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#reference-names
|
|
x.len == 1 and x[0] in {'-', '_', '.', ':', '+'}
|
|
|
|
func getRefnameIdx(p: RstParser, startIdx: int): int =
|
|
## Gets last token index of a refname ("word" in RST terminology):
|
|
##
|
|
## reference names are single words consisting of alphanumerics plus
|
|
## isolated (no two adjacent) internal hyphens, underscores, periods,
|
|
## colons and plus signs; no whitespace or other characters are allowed.
|
|
##
|
|
## Refnames are used for:
|
|
## - reference names
|
|
## - role names
|
|
## - directive names
|
|
## - footnote labels
|
|
##
|
|
# TODO: use this func in all other relevant places
|
|
var j = startIdx
|
|
if p.tok[j].kind == tkWord:
|
|
inc j
|
|
while p.tok[j].kind == tkPunct and validRefnamePunct(p.tok[j].symbol) and
|
|
p.tok[j+1].kind == tkWord:
|
|
inc j, 2
|
|
result = j - 1
|
|
|
|
func getRefname(p: RstParser, startIdx: int): (string, int) =
|
|
let lastIdx = getRefnameIdx(p, startIdx)
|
|
result[1] = lastIdx
|
|
for j in startIdx..lastIdx:
|
|
result[0].add p.tok[j].symbol
|
|
|
|
proc getReferenceName(p: var RstParser, endStr: string): PRstNode =
|
|
var res = newRstNode(rnInner)
|
|
while true:
|
|
case currentTok(p).kind
|
|
of tkWord, tkOther, tkWhite:
|
|
res.add(newLeaf(p))
|
|
of tkPunct:
|
|
if currentTok(p).symbol == endStr:
|
|
inc p.idx
|
|
break
|
|
else:
|
|
res.add(newLeaf(p))
|
|
else:
|
|
rstMessage(p, meExpected, endStr)
|
|
break
|
|
inc p.idx
|
|
result = res
|
|
|
|
proc untilEol(p: var RstParser): PRstNode =
|
|
result = newRstNode(rnInner)
|
|
while currentTok(p).kind notin {tkIndent, tkEof}:
|
|
result.add(newLeaf(p))
|
|
inc p.idx
|
|
|
|
proc expect(p: var RstParser, tok: string) =
|
|
if currentTok(p).symbol == tok: inc p.idx
|
|
else: rstMessage(p, meExpected, tok)
|
|
|
|
proc inlineMarkdownEnd(p: RstParser): bool =
|
|
result = prevTok(p).kind notin {tkIndent, tkWhite}
|
|
## (For a special case of ` we don't allow spaces surrounding it
|
|
## unlike original Markdown because this behavior confusing/useless)
|
|
|
|
proc inlineRstEnd(p: RstParser): bool =
|
|
# rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
|
|
# Rule 2:
|
|
result = prevTok(p).kind notin {tkIndent, tkWhite}
|
|
if not result: return
|
|
# Rule 7:
|
|
result = nextTok(p).kind in {tkIndent, tkWhite, tkEof} or
|
|
nextTok(p).symbol[0] in
|
|
{'\'', '\"', ')', ']', '}', '>', '-', '/', '\\', ':', '.', ',', ';', '!', '?', '_'}
|
|
|
|
proc isInlineMarkupEnd(p: RstParser, markup: string, exact: bool): bool =
|
|
if exact:
|
|
result = currentTok(p).symbol == markup
|
|
else:
|
|
result = currentTok(p).symbol.endsWith markup
|
|
if (not result) and markup == "``":
|
|
# check that escaping may have splitted `` to 2 tokens ` and `
|
|
result = currentTok(p).symbol == "`" and prevTok(p).symbol == "`"
|
|
if not result: return
|
|
# surroundings check
|
|
if markup in ["_", "__"]:
|
|
result = inlineRstEnd(p)
|
|
else:
|
|
if roPreferMarkdown in p.s.options: result = inlineMarkdownEnd(p)
|
|
else: result = inlineRstEnd(p)
|
|
|
|
proc rstRuleSurround(p: RstParser): bool =
|
|
result = true
|
|
# Rules 4 & 5:
|
|
if p.idx > 0:
|
|
var d: char
|
|
var c = prevTok(p).symbol[0]
|
|
case c
|
|
of '\'', '\"': d = c
|
|
of '(': d = ')'
|
|
of '[': d = ']'
|
|
of '{': d = '}'
|
|
of '<': d = '>'
|
|
else: d = '\0'
|
|
if d != '\0': result = nextTok(p).symbol[0] != d
|
|
|
|
proc inlineMarkdownStart(p: RstParser): bool =
|
|
result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
|
|
if not result: return
|
|
# this rst rule is really nice, let us use it in Markdown mode too.
|
|
result = rstRuleSurround(p)
|
|
|
|
proc inlineRstStart(p: RstParser): bool =
|
|
## rst rules: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#inline-markup-recognition-rules
|
|
# Rule 6
|
|
result = p.idx == 0 or prevTok(p).kind in {tkIndent, tkWhite} or
|
|
prevTok(p).symbol[0] in {'\'', '\"', '(', '[', '{', '<', '-', '/', ':', '_'}
|
|
if not result: return
|
|
# Rule 1:
|
|
result = nextTok(p).kind notin {tkIndent, tkWhite, tkEof}
|
|
if not result: return
|
|
result = rstRuleSurround(p)
|
|
|
|
proc isInlineMarkupStart(p: RstParser, markup: string): bool =
|
|
if markup != "_`":
|
|
result = currentTok(p).symbol == markup
|
|
else: # _` is a 2 token case
|
|
result = currentTok(p).symbol == "_" and nextTok(p).symbol == "`"
|
|
if not result: return
|
|
# surroundings check
|
|
if markup in ["_", "__", "[", "|"]:
|
|
# Note: we require space/punctuation even before [markdown link](...)
|
|
result = inlineRstStart(p)
|
|
else:
|
|
if roPreferMarkdown in p.s.options: result = inlineMarkdownStart(p)
|
|
else: result = inlineRstStart(p)
|
|
|
|
proc match(p: RstParser, start: int, expr: string): bool =
|
|
# regular expressions are:
|
|
# special char exact match
|
|
# 'w' tkWord
|
|
# ' ' tkWhite
|
|
# 'a' tkAdornment
|
|
# 'i' tkIndent
|
|
# 'I' tkIndent or tkEof
|
|
# 'p' tkPunct
|
|
# 'T' always true
|
|
# 'E' whitespace, indent or eof
|
|
# 'e' any enumeration sequence or '#' (for enumeration lists)
|
|
# 'x' a..z or '#' (for enumeration lists)
|
|
# 'n' 0..9 or '#' (for enumeration lists)
|
|
var i = 0
|
|
var j = start
|
|
var last = expr.len - 1
|
|
while i <= last:
|
|
case expr[i]
|
|
of 'w':
|
|
let lastIdx = getRefnameIdx(p, j)
|
|
result = lastIdx >= j
|
|
if result: j = lastIdx
|
|
of ' ': result = p.tok[j].kind == tkWhite
|
|
of 'i': result = p.tok[j].kind == tkIndent
|
|
of 'I': result = p.tok[j].kind in {tkIndent, tkEof}
|
|
of 'p': result = p.tok[j].kind == tkPunct
|
|
of 'a': result = p.tok[j].kind == tkAdornment
|
|
of 'o': result = p.tok[j].kind == tkOther
|
|
of 'T': result = true
|
|
of 'E': result = p.tok[j].kind in {tkEof, tkWhite, tkIndent}
|
|
of 'e', 'x', 'n':
|
|
result = p.tok[j].kind == tkWord or p.tok[j].symbol == "#"
|
|
if result:
|
|
case p.tok[j].symbol[0]
|
|
of '#': result = true
|
|
of 'a'..'z', 'A'..'Z':
|
|
result = expr[i] in {'e', 'x'} and p.tok[j].symbol.len == 1
|
|
of '0'..'9':
|
|
result = expr[i] in {'e', 'n'} and
|
|
allCharsInSet(p.tok[j].symbol, {'0'..'9'})
|
|
else: result = false
|
|
else:
|
|
var c = expr[i]
|
|
var length = 0
|
|
while i <= last and expr[i] == c:
|
|
inc i
|
|
inc length
|
|
dec i
|
|
result = p.tok[j].kind in {tkPunct, tkAdornment} and
|
|
p.tok[j].symbol.len == length and p.tok[j].symbol[0] == c
|
|
if not result: return
|
|
inc j
|
|
inc i
|
|
result = true
|
|
|
|
proc safeProtocol*(linkStr: var string): string =
|
|
# Returns link's protocol and, if it's not safe, clears `linkStr`
|
|
result = ""
|
|
if scanf(linkStr, "$w:", result):
|
|
# if it has a protocol at all, ensure that it's not 'javascript:' or worse:
|
|
if cmpIgnoreCase(result, "http") == 0 or
|
|
cmpIgnoreCase(result, "https") == 0 or
|
|
cmpIgnoreCase(result, "ftp") == 0:
|
|
discard "it's fine"
|
|
else:
|
|
linkStr = ""
|
|
|
|
proc fixupEmbeddedRef(p: var RstParser, n, a, b: PRstNode): bool =
|
|
# Returns `true` if the link belongs to an allowed protocol
|
|
var sep = - 1
|
|
for i in countdown(n.len - 2, 0):
|
|
if n.sons[i].text == "<":
|
|
sep = i
|
|
break
|
|
var incr = if sep > 0 and n.sons[sep - 1].text[0] == ' ': 2 else: 1
|
|
for i in countup(0, sep - incr): a.add(n.sons[i])
|
|
var linkStr = ""
|
|
for i in countup(sep + 1, n.len - 2): linkStr.add(n.sons[i].addNodes)
|
|
if linkStr != "":
|
|
let protocol = safeProtocol(linkStr)
|
|
result = linkStr != ""
|
|
if not result:
|
|
rstMessage(p, mwBrokenLink, protocol,
|
|
p.tok[p.idx-3].line, p.tok[p.idx-3].col)
|
|
b.add newLeaf(linkStr)
|
|
|
|
proc whichRole(p: RstParser, sym: string): RstNodeKind =
|
|
result = whichRoleAux(sym)
|
|
if result == rnUnknownRole:
|
|
rstMessage(p, mwUnsupportedLanguage, sym)
|
|
|
|
proc toInlineCode(n: PRstNode, language: string): PRstNode =
|
|
## Creates rnInlineCode and attaches `n` contents as code (in 3rd son).
|
|
result = newRstNode(rnInlineCode, info=n.info)
|
|
let args = newRstNode(rnDirArg)
|
|
var lang = language
|
|
if language == "cpp": lang = "c++"
|
|
elif language == "csharp": lang = "c#"
|
|
args.add newLeaf(lang)
|
|
result.add args
|
|
result.add PRstNode(nil)
|
|
var lb = newRstNode(rnLiteralBlock)
|
|
var s: string
|
|
for i in n.sons:
|
|
assert i.kind == rnLeaf
|
|
s.add i.text
|
|
lb.add newLeaf(s)
|
|
result.add lb
|
|
|
|
proc toOtherRole(n: PRstNode, kind: RstNodeKind, roleName: string): PRstNode =
|
|
let newN = newRstNode(rnInner, n.sons)
|
|
let newSons = @[newN, newLeaf(roleName)]
|
|
result = newRstNode(kind, newSons)
|
|
|
|
proc parsePostfix(p: var RstParser, n: PRstNode): PRstNode =
|
|
## Finalizes node `n` that was tentatively determined as interpreted text.
|
|
var newKind = n.kind
|
|
var newSons = n.sons
|
|
|
|
proc finalizeInterpreted(node: PRstNode, newKind: RstNodeKind,
|
|
newSons: seq[PRstNode], roleName: string):
|
|
PRstNode {.nimcall.} =
|
|
# fixes interpreted text (`x` or `y`:role:) to proper internal AST format
|
|
if newKind in {rnUnknownRole, rnCodeFragment}:
|
|
result = node.toOtherRole(newKind, roleName)
|
|
elif newKind == rnInlineCode:
|
|
result = node.toInlineCode(language=roleName)
|
|
else:
|
|
result = newRstNode(newKind, newSons)
|
|
|
|
if isInlineMarkupEnd(p, "_", exact=true) or
|
|
isInlineMarkupEnd(p, "__", exact=true):
|
|
inc p.idx
|
|
if p.tok[p.idx-2].symbol == "`" and p.tok[p.idx-3].symbol == ">":
|
|
var a = newRstNode(rnInner)
|
|
var b = newRstNode(rnInner)
|
|
if fixupEmbeddedRef(p, n, a, b):
|
|
if a.len == 0: # e.g. `<a_named_relative_link>`_
|
|
newKind = rnStandaloneHyperlink
|
|
newSons = @[b]
|
|
else: # e.g. `link title <http://site>`_
|
|
newKind = rnHyperlink
|
|
newSons = @[a, b]
|
|
setRef(p, rstnodeToRefname(a), b, implicitHyperlinkAlias)
|
|
else: # include as plain text, not a link
|
|
newKind = rnInner
|
|
newSons = n.sons
|
|
result = newRstNode(newKind, newSons)
|
|
else: # some link that will be resolved in `resolveSubs`
|
|
newKind = rnRstRef
|
|
result = newRstNode(newKind, sons=newSons, info=n.info)
|
|
elif match(p, p.idx, ":w:"):
|
|
# a role:
|
|
let (roleName, lastIdx) = getRefname(p, p.idx+1)
|
|
newKind = whichRole(p, roleName)
|
|
result = n.finalizeInterpreted(newKind, newSons, roleName)
|
|
p.idx = lastIdx + 2
|
|
else:
|
|
result = n.finalizeInterpreted(p.s.currRoleKind, newSons, p.s.currRole)
|
|
|
|
proc matchVerbatim(p: RstParser, start: int, expr: string): int =
|
|
result = start
|
|
var j = 0
|
|
while j < expr.len and result < p.tok.len and
|
|
continuesWith(expr, p.tok[result].symbol, j):
|
|
inc j, p.tok[result].symbol.len
|
|
inc result
|
|
if j < expr.len: result = 0
|
|
|
|
proc parseSmiley(p: var RstParser): PRstNode =
|
|
if currentTok(p).symbol[0] notin SmileyStartChars: return
|
|
for key, val in items(Smilies):
|
|
let m = matchVerbatim(p, p.idx, key)
|
|
if m > 0:
|
|
p.idx = m
|
|
result = newRstNode(rnSmiley)
|
|
result.text = val
|
|
return
|
|
|
|
proc isUrl(p: RstParser, i: int): bool =
|
|
result = p.tok[i+1].symbol == ":" and p.tok[i+2].symbol == "//" and
|
|
p.tok[i+3].kind == tkWord and
|
|
p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"]
|
|
|
|
proc checkParen(token: Token, parensStack: var seq[char]): bool {.inline.} =
|
|
## Returns `true` iff `token` is a closing parenthesis for some
|
|
## previous opening parenthesis saved in `parensStack`.
|
|
## This is according Markdown balanced parentheses rule
|
|
## (https://spec.commonmark.org/0.29/#link-destination)
|
|
## to allow links like
|
|
## https://en.wikipedia.org/wiki/APL_(programming_language),
|
|
## we use it for RST also.
|
|
result = false
|
|
if token.kind == tkPunct:
|
|
let c = token.symbol[0]
|
|
if c in {'(', '[', '{'}: # push
|
|
parensStack.add c
|
|
elif c in {')', ']', '}'}: # try pop
|
|
# a case like ([) inside a link is allowed and [ is also `pop`ed:
|
|
for i in countdown(parensStack.len - 1, 0):
|
|
if (parensStack[i] == '(' and c == ')' or
|
|
parensStack[i] == '[' and c == ']' or
|
|
parensStack[i] == '{' and c == '}'):
|
|
parensStack.setLen i
|
|
result = true
|
|
break
|
|
|
|
proc parseUrl(p: var RstParser): PRstNode =
|
|
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks
|
|
result = newRstNode(rnStandaloneHyperlink)
|
|
var lastIdx = p.idx
|
|
var closedParenIdx = p.idx - 1 # for balanced parens rule
|
|
var parensStack: seq[char]
|
|
while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}:
|
|
let isClosing = checkParen(p.tok[lastIdx], parensStack)
|
|
if isClosing:
|
|
closedParenIdx = lastIdx
|
|
inc lastIdx
|
|
dec lastIdx
|
|
# standalone URL can not end with punctuation in RST
|
|
while lastIdx > closedParenIdx and p.tok[lastIdx].kind == tkPunct and
|
|
p.tok[lastIdx].symbol != "/":
|
|
dec lastIdx
|
|
var s = ""
|
|
for i in p.idx .. lastIdx: s.add p.tok[i].symbol
|
|
result.add s
|
|
p.idx = lastIdx + 1
|
|
|
|
proc parseWordOrRef(p: var RstParser, father: PRstNode) =
|
|
## Parses a normal word or may be a reference or URL.
|
|
if nextTok(p).kind != tkPunct: # <- main path, a normal word
|
|
father.add newLeaf(p)
|
|
inc p.idx
|
|
elif isUrl(p, p.idx): # URL http://something
|
|
father.add parseUrl(p)
|
|
else:
|
|
# check for reference (probably, long one like some.ref.with.dots_ )
|
|
var saveIdx = p.idx
|
|
var reference: PRstNode = nil
|
|
inc p.idx
|
|
while currentTok(p).kind in {tkWord, tkPunct}:
|
|
if currentTok(p).kind == tkPunct:
|
|
if isInlineMarkupEnd(p, "_", exact=true):
|
|
reference = newRstNode(rnRstRef, info=lineInfo(p, saveIdx))
|
|
break
|
|
if not validRefnamePunct(currentTok(p).symbol):
|
|
break
|
|
inc p.idx
|
|
if reference != nil:
|
|
for i in saveIdx..p.idx-1: reference.add newLeaf(p.tok[i].symbol)
|
|
father.add reference
|
|
inc p.idx # skip final _
|
|
else: # 1 normal word
|
|
father.add newLeaf(p.tok[saveIdx].symbol)
|
|
p.idx = saveIdx + 1
|
|
|
|
proc parseBackslash(p: var RstParser, father: PRstNode) =
|
|
assert(currentTok(p).kind == tkPunct)
|
|
if currentTok(p).symbol == "\\":
|
|
# XXX: Unicode?
|
|
inc p.idx
|
|
if currentTok(p).kind != tkWhite: father.add(newLeaf(p))
|
|
if currentTok(p).kind != tkEof: inc p.idx
|
|
else:
|
|
father.add(newLeaf(p))
|
|
inc p.idx
|
|
|
|
proc parseUntil(p: var RstParser, father: PRstNode, postfix: string,
|
|
interpretBackslash: bool) =
|
|
let
|
|
line = currentTok(p).line
|
|
col = currentTok(p).col
|
|
inc p.idx
|
|
while true:
|
|
case currentTok(p).kind
|
|
of tkPunct:
|
|
if isInlineMarkupEnd(p, postfix, exact=false):
|
|
let l = currentTok(p).symbol.len
|
|
if l > postfix.len:
|
|
# handle cases like *emphasis with stars****. (It's valid RST!)
|
|
father.add newLeaf(currentTok(p).symbol[0 ..< l - postfix.len])
|
|
elif postfix == "``" and currentTok(p).symbol == "`" and
|
|
prevTok(p).symbol == "`":
|
|
# handle cases like ``literal\`` - delete ` already added after \
|
|
father.sons.setLen(father.sons.len - 1)
|
|
inc p.idx
|
|
break
|
|
else:
|
|
if postfix == "`":
|
|
if currentTok(p).symbol == "\\":
|
|
if nextTok(p).symbol == "\\":
|
|
father.add newLeaf("\\")
|
|
father.add newLeaf("\\")
|
|
inc p.idx, 2
|
|
elif nextTok(p).symbol == "`": # escape `
|
|
father.add newLeaf("`")
|
|
inc p.idx, 2
|
|
else:
|
|
father.add newLeaf("\\")
|
|
inc p.idx
|
|
else:
|
|
father.add(newLeaf(p))
|
|
inc p.idx
|
|
else:
|
|
if interpretBackslash:
|
|
parseBackslash(p, father)
|
|
else:
|
|
father.add(newLeaf(p))
|
|
inc p.idx
|
|
of tkAdornment, tkWord, tkOther:
|
|
father.add(newLeaf(p))
|
|
inc p.idx
|
|
of tkIndent:
|
|
father.add newLeaf(" ")
|
|
inc p.idx
|
|
if currentTok(p).kind == tkIndent:
|
|
rstMessage(p, meExpected, postfix, line, col)
|
|
break
|
|
of tkWhite:
|
|
father.add newLeaf(" ")
|
|
inc p.idx
|
|
else: rstMessage(p, meExpected, postfix, line, col)
|
|
|
|
proc parseMarkdownCodeblockFields(p: var RstParser): PRstNode =
|
|
## Parses additional (after language string) code block parameters
|
|
## in a format *suggested* in the `CommonMark Spec`_ with handling of `"`.
|
|
if currentTok(p).kind == tkIndent:
|
|
result = nil
|
|
else:
|
|
result = newRstNode(rnFieldList)
|
|
while currentTok(p).kind != tkIndent:
|
|
if currentTok(p).kind == tkWhite:
|
|
inc p.idx
|
|
else:
|
|
let field = newRstNode(rnField)
|
|
var fieldName = ""
|
|
while currentTok(p).kind notin {tkWhite, tkIndent, tkEof} and
|
|
currentTok(p).symbol != "=":
|
|
fieldName.add currentTok(p).symbol
|
|
inc p.idx
|
|
field.add(newRstNode(rnFieldName, @[newLeaf(fieldName)]))
|
|
if currentTok(p).kind == tkWhite: inc p.idx
|
|
let fieldBody = newRstNode(rnFieldBody)
|
|
if currentTok(p).symbol == "=":
|
|
inc p.idx
|
|
if currentTok(p).kind == tkWhite: inc p.idx
|
|
var fieldValue = ""
|
|
if currentTok(p).symbol == "\"":
|
|
while true:
|
|
fieldValue.add currentTok(p).symbol
|
|
inc p.idx
|
|
if currentTok(p).kind == tkEof:
|
|
rstMessage(p, meExpected, "\"")
|
|
elif currentTok(p).symbol == "\"":
|
|
fieldValue.add "\""
|
|
inc p.idx
|
|
break
|
|
else:
|
|
while currentTok(p).kind notin {tkWhite, tkIndent, tkEof}:
|
|
fieldValue.add currentTok(p).symbol
|
|
inc p.idx
|
|
fieldBody.add newLeaf(fieldValue)
|
|
field.add(fieldBody)
|
|
result.add(field)
|
|
|
|
proc mayLoadFile(p: RstParser, result: var PRstNode) =
|
|
var filename = strip(getFieldValue(result, "file"),
|
|
chars = Whitespace + {'"'})
|
|
if filename != "":
|
|
if roSandboxDisabled notin p.s.options:
|
|
let tok = p.tok[p.idx-2]
|
|
rstMessage(p, meSandboxedDirective, "file", tok.line, tok.col)
|
|
var path = p.findRelativeFile(filename)
|
|
if path == "": rstMessage(p, meCannotOpenFile, filename)
|
|
var n = newRstNode(rnLiteralBlock)
|
|
n.add newLeaf(readFile(path))
|
|
result.sons[2] = n
|
|
|
|
proc defaultCodeLangNim(p: RstParser, result: var PRstNode) =
|
|
# Create a field block if the input block didn't have any.
|
|
if result.sons[1].isNil: result.sons[1] = newRstNode(rnFieldList)
|
|
assert result.sons[1].kind == rnFieldList
|
|
# Hook the extra field and specify the Nim language as value.
|
|
var extraNode = newRstNode(rnField, info=lineInfo(p))
|
|
extraNode.add(newRstNode(rnFieldName))
|
|
extraNode.add(newRstNode(rnFieldBody))
|
|
extraNode.sons[0].add newLeaf("default-language")
|
|
extraNode.sons[1].add newLeaf("Nim")
|
|
result.sons[1].add(extraNode)
|
|
|
|
proc parseMarkdownCodeblock(p: var RstParser): PRstNode =
|
|
result = newRstNodeA(p, rnCodeBlock)
|
|
result.sons.setLen(3)
|
|
let line = curLine(p)
|
|
let baseCol = currentTok(p).col
|
|
let baseSym = currentTok(p).symbol # usually just ```
|
|
inc p.idx
|
|
result.info = lineInfo(p)
|
|
var args = newRstNode(rnDirArg)
|
|
if currentTok(p).kind == tkWord:
|
|
args.add(newLeaf(p))
|
|
inc p.idx
|
|
result.sons[1] = parseMarkdownCodeblockFields(p)
|
|
mayLoadFile(p, result)
|
|
else:
|
|
args = nil
|
|
var n = newLeaf("")
|
|
while true:
|
|
if currentTok(p).kind == tkEof:
|
|
rstMessage(p, meMissingClosing,
|
|
"$1 (started at line $2)" % [baseSym, $line])
|
|
break
|
|
elif nextTok(p).kind in {tkPunct, tkAdornment} and
|
|
nextTok(p).symbol[0] == baseSym[0] and
|
|
nextTok(p).symbol.len >= baseSym.len:
|
|
inc p.idx, 2
|
|
break
|
|
elif currentTok(p).kind == tkIndent:
|
|
n.text.add "\n"
|
|
if currentTok(p).ival > baseCol:
|
|
n.text.add " ".repeat(currentTok(p).ival - baseCol)
|
|
elif currentTok(p).ival < baseCol:
|
|
rstMessage(p, mwRstStyle,
|
|
"unexpected de-indentation in Markdown code block")
|
|
inc p.idx
|
|
else:
|
|
n.text.add(currentTok(p).symbol)
|
|
inc p.idx
|
|
result.sons[0] = args
|
|
if result.sons[2] == nil:
|
|
var lb = newRstNode(rnLiteralBlock)
|
|
lb.add(n)
|
|
result.sons[2] = lb
|
|
if result.sons[0].isNil and roNimFile in p.s.options:
|
|
defaultCodeLangNim(p, result)
|
|
|
|
proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool =
|
|
# Parses Markdown link. If it's Pandoc auto-link then its second
|
|
# son (target) will be in tokenized format (rnInner with leafs).
|
|
var desc = newRstNode(rnInner)
|
|
var i = p.idx
|
|
|
|
var parensStack: seq[char]
|
|
template parse(endToken, dest) =
|
|
parensStack.setLen 0
|
|
inc i # skip begin token
|
|
while true:
|
|
if p.tok[i].kind == tkEof: return false
|
|
if p.tok[i].kind == tkIndent and p.tok[i+1].kind == tkIndent:
|
|
return false
|
|
let isClosing = checkParen(p.tok[i], parensStack)
|
|
if p.tok[i].symbol == endToken and not isClosing:
|
|
break
|
|
let symbol = if p.tok[i].kind == tkIndent: " " else: p.tok[i].symbol
|
|
when dest is string: dest.add symbol
|
|
else: dest.add newLeaf(symbol)
|
|
inc i
|
|
inc i # skip end token
|
|
|
|
parse("]", desc)
|
|
if p.tok[i].symbol == "(":
|
|
var link = ""
|
|
let linkIdx = i + 1
|
|
parse(")", link)
|
|
# only commit if we detected no syntax error:
|
|
let protocol = safeProtocol(link)
|
|
if link == "":
|
|
result = false
|
|
rstMessage(p, mwBrokenLink, protocol,
|
|
p.tok[linkIdx].line, p.tok[linkIdx].col)
|
|
else:
|
|
let child = newRstNode(rnHyperlink)
|
|
child.add newLeaf(desc.addNodes)
|
|
child.add link
|
|
father.add child
|
|
p.idx = i
|
|
result = true
|
|
elif roPreferMarkdown in p.s.options:
|
|
# Use Pandoc's implicit_header_references extension
|
|
var n = newRstNode(rnPandocRef)
|
|
if p.tok[i].symbol == "[":
|
|
var link = newRstNode(rnInner)
|
|
let targetIdx = i + 1
|
|
parse("]", link)
|
|
n.add desc
|
|
if link.len != 0: # [description][target]
|
|
n.add link
|
|
n.info = lineInfo(p, targetIdx)
|
|
else: # [description=target][]
|
|
n.add desc
|
|
n.info = lineInfo(p, p.idx + 1)
|
|
else: # [description=target]
|
|
n.add desc
|
|
n.add desc # target is the same as description
|
|
n.info = lineInfo(p, p.idx + 1)
|
|
father.add n
|
|
p.idx = i
|
|
result = true
|
|
else:
|
|
result = false
|
|
|
|
proc getRstFootnoteType(label: PRstNode): (FootnoteType, int) =
|
|
if label.sons.len >= 1 and label.sons[0].kind == rnLeaf and
|
|
label.sons[0].text == "#":
|
|
if label.sons.len == 1:
|
|
result = (fnAutoNumber, -1)
|
|
else:
|
|
result = (fnAutoNumberLabel, -1)
|
|
elif label.len == 1 and label.sons[0].kind == rnLeaf and
|
|
label.sons[0].text == "*":
|
|
result = (fnAutoSymbol, -1)
|
|
elif label.len == 1 and label.sons[0].kind == rnLeaf:
|
|
try:
|
|
result = (fnManualNumber, parseInt(label.sons[0].text))
|
|
except ValueError:
|
|
result = (fnCitation, -1)
|
|
else:
|
|
result = (fnCitation, -1)
|
|
|
|
proc getMdFootnoteType(label: PRstNode): (FootnoteType, int) =
|
|
try:
|
|
result = (fnManualNumber, parseInt(label.sons[0].text))
|
|
except ValueError:
|
|
result = (fnAutoNumberLabel, -1)
|
|
|
|
proc getFootnoteType(s: PRstSharedState, label: PRstNode): (FootnoteType, int) =
|
|
## Returns footnote/citation type and manual number (if present).
|
|
if isMd(s): getMdFootnoteType(label)
|
|
else: getRstFootnoteType(label)
|
|
|
|
proc parseRstFootnoteName(p: var RstParser, reference: bool): PRstNode =
|
|
## parse footnote/citation label. Precondition: start at `[`.
|
|
## Label text should be valid ref. name symbol, otherwise nil is returned.
|
|
var i = p.idx + 1
|
|
result = newRstNode(rnInner)
|
|
while true:
|
|
if p.tok[i].kind in {tkEof, tkIndent, tkWhite}:
|
|
return nil
|
|
if p.tok[i].kind == tkPunct:
|
|
case p.tok[i].symbol:
|
|
of "]":
|
|
if i > p.idx + 1 and (not reference or (p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol == "_")):
|
|
inc i # skip ]
|
|
if reference: inc i # skip _
|
|
break # to succeed, it's a footnote/citation indeed
|
|
else:
|
|
return nil
|
|
of "#":
|
|
if i != p.idx + 1:
|
|
return nil
|
|
of "*":
|
|
if i != p.idx + 1 and p.tok[i].kind != tkPunct and p.tok[i+1].symbol != "]":
|
|
return nil
|
|
else:
|
|
if not validRefnamePunct(p.tok[i].symbol):
|
|
return nil
|
|
result.add newLeaf(p.tok[i].symbol)
|
|
inc i
|
|
p.idx = i
|
|
|
|
proc isMdFootnoteName(p: RstParser, reference: bool): bool =
|
|
## Pandoc Markdown footnote extension.
|
|
let j = p.idx
|
|
result = p.tok[j].symbol == "[" and p.tok[j+1].symbol == "^" and
|
|
p.tok[j+2].kind == tkWord
|
|
|
|
proc parseMdFootnoteName(p: var RstParser, reference: bool): PRstNode =
|
|
if isMdFootnoteName(p, reference):
|
|
result = newRstNode(rnInner)
|
|
var j = p.idx + 2
|
|
while p.tok[j].kind in {tkWord, tkOther} or
|
|
validRefnamePunct(p.tok[j].symbol):
|
|
result.add newLeaf(p.tok[j].symbol)
|
|
inc j
|
|
if j == p.idx + 2:
|
|
return nil
|
|
if p.tok[j].symbol == "]":
|
|
if reference:
|
|
p.idx = j + 1 # skip ]
|
|
else:
|
|
if p.tok[j+1].symbol == ":":
|
|
p.idx = j + 2 # skip ]:
|
|
else:
|
|
result = nil
|
|
else:
|
|
result = nil
|
|
else:
|
|
result = nil
|
|
|
|
proc parseFootnoteName(p: var RstParser, reference: bool): PRstNode =
|
|
if isMd(p): parseMdFootnoteName(p, reference)
|
|
else:
|
|
if isInlineMarkupStart(p, "["): parseRstFootnoteName(p, reference)
|
|
else: nil
|
|
|
|
proc isMarkdownCodeBlock(p: RstParser, idx: int): bool =
|
|
let tok = p.tok[idx]
|
|
template allowedSymbol: bool =
|
|
(tok.symbol[0] == '`' or
|
|
roPreferMarkdown in p.s.options and tok.symbol[0] == '~')
|
|
result = (roSupportMarkdown in p.s.options and
|
|
tok.kind in {tkPunct, tkAdornment} and
|
|
allowedSymbol and
|
|
tok.symbol.len >= 3)
|
|
|
|
proc isMarkdownCodeBlock(p: RstParser): bool =
|
|
isMarkdownCodeBlock(p, p.idx)
|
|
|
|
proc parseInline(p: var RstParser, father: PRstNode) =
|
|
var n: PRstNode # to be used in `if` condition
|
|
let saveIdx = p.idx
|
|
case currentTok(p).kind
|
|
of tkPunct:
|
|
if isInlineMarkupStart(p, "***"):
|
|
var n = newRstNode(rnTripleEmphasis)
|
|
parseUntil(p, n, "***", true)
|
|
father.add(n)
|
|
elif isInlineMarkupStart(p, "**"):
|
|
var n = newRstNode(rnStrongEmphasis)
|
|
parseUntil(p, n, "**", true)
|
|
father.add(n)
|
|
elif isInlineMarkupStart(p, "*"):
|
|
var n = newRstNode(rnEmphasis)
|
|
parseUntil(p, n, "*", true)
|
|
father.add(n)
|
|
elif isInlineMarkupStart(p, "_`"):
|
|
var n = newRstNode(rnInlineTarget)
|
|
inc p.idx
|
|
parseUntil(p, n, "`", false)
|
|
n.anchor = rstnodeToRefname(n)
|
|
addAnchorRst(p, name = linkName(n), target = n,
|
|
anchorType=manualInlineAnchor)
|
|
father.add(n)
|
|
elif isMarkdownCodeBlock(p):
|
|
father.add(parseMarkdownCodeblock(p))
|
|
elif isInlineMarkupStart(p, "``"):
|
|
var n = newRstNode(rnInlineLiteral)
|
|
parseUntil(p, n, "``", false)
|
|
father.add(n)
|
|
elif match(p, p.idx, ":w:") and
|
|
(var lastIdx = getRefnameIdx(p, p.idx + 1);
|
|
p.tok[lastIdx+2].symbol == "`"):
|
|
let (roleName, _) = getRefname(p, p.idx+1)
|
|
let k = whichRole(p, roleName)
|
|
var n = newRstNode(k)
|
|
p.idx = lastIdx + 2
|
|
if k == rnInlineCode:
|
|
n = n.toInlineCode(language=roleName)
|
|
parseUntil(p, n, "`", false) # bug #17260
|
|
if k in {rnUnknownRole, rnCodeFragment}:
|
|
n = n.toOtherRole(k, roleName)
|
|
father.add(n)
|
|
elif isInlineMarkupStart(p, "`"):
|
|
var n = newRstNode(rnInterpretedText, info=lineInfo(p, p.idx+1))
|
|
parseUntil(p, n, "`", false) # bug #17260
|
|
n = parsePostfix(p, n)
|
|
father.add(n)
|
|
elif isInlineMarkupStart(p, "|"):
|
|
var n = newRstNode(rnSubstitutionReferences, info=lineInfo(p, p.idx+1))
|
|
parseUntil(p, n, "|", false)
|
|
father.add(n)
|
|
elif currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and
|
|
(n = parseFootnoteName(p, reference=true); n != nil):
|
|
var nn = newRstNode(rnFootnoteRef)
|
|
nn.info = lineInfo(p, saveIdx+1)
|
|
nn.add n
|
|
let (fnType, _) = getFootnoteType(p.s, n)
|
|
case fnType
|
|
of fnAutoSymbol:
|
|
p.s.lineFootnoteSymRef.add lineInfo(p)
|
|
of fnAutoNumber:
|
|
p.s.lineFootnoteNumRef.add lineInfo(p)
|
|
else: discard
|
|
father.add(nn)
|
|
elif roSupportMarkdown in p.s.options and
|
|
currentTok(p).symbol == "[" and nextTok(p).symbol != "[" and
|
|
parseMarkdownLink(p, father):
|
|
discard "parseMarkdownLink already processed it"
|
|
else:
|
|
if roSupportSmilies in p.s.options:
|
|
let n = parseSmiley(p)
|
|
if n != nil:
|
|
father.add(n)
|
|
return
|
|
parseBackslash(p, father)
|
|
of tkWord:
|
|
if roSupportSmilies in p.s.options:
|
|
let n = parseSmiley(p)
|
|
if n != nil:
|
|
father.add(n)
|
|
return
|
|
parseWordOrRef(p, father)
|
|
of tkAdornment, tkOther, tkWhite:
|
|
if isMarkdownCodeBlock(p):
|
|
father.add(parseMarkdownCodeblock(p))
|
|
return
|
|
if roSupportSmilies in p.s.options:
|
|
let n = parseSmiley(p)
|
|
if n != nil:
|
|
father.add(n)
|
|
return
|
|
father.add(newLeaf(p))
|
|
inc p.idx
|
|
else: discard
|
|
|
|
proc getDirective(p: var RstParser): string =
|
|
result = ""
|
|
if currentTok(p).kind == tkWhite:
|
|
let (name, lastIdx) = getRefname(p, p.idx + 1)
|
|
let afterIdx = lastIdx + 1
|
|
if name.len > 0:
|
|
if p.tok[afterIdx].symbol == "::":
|
|
result = name
|
|
p.idx = afterIdx + 1
|
|
if currentTok(p).kind == tkWhite:
|
|
inc p.idx
|
|
elif currentTok(p).kind != tkIndent:
|
|
rstMessage(p, mwRstStyle,
|
|
"whitespace or newline expected after directive " & name)
|
|
result = result.toLowerAscii()
|
|
elif p.tok[afterIdx].symbol == ":":
|
|
rstMessage(p, mwRstStyle,
|
|
"double colon :: may be missing at end of '" & name & "'",
|
|
p.tok[afterIdx].line, p.tok[afterIdx].col)
|
|
elif p.tok[afterIdx].kind == tkPunct and p.tok[afterIdx].symbol[0] == ':':
|
|
rstMessage(p, mwRstStyle,
|
|
"too many colons for a directive (should be ::)",
|
|
p.tok[afterIdx].line, p.tok[afterIdx].col)
|
|
|
|
proc parseComment(p: var RstParser, col: int): PRstNode =
|
|
if currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
|
|
inc p.idx # empty comment
|
|
else:
|
|
while currentTok(p).kind != tkEof:
|
|
if currentTok(p).kind == tkIndent and currentTok(p).ival > col or
|
|
currentTok(p).kind != tkIndent and currentTok(p).col > col:
|
|
inc p.idx
|
|
else:
|
|
break
|
|
result = nil
|
|
|
|
proc parseLine(p: var RstParser, father: PRstNode) =
|
|
while true:
|
|
case currentTok(p).kind
|
|
of tkWhite, tkWord, tkOther, tkPunct: parseInline(p, father)
|
|
else: break
|
|
|
|
proc parseUntilNewline(p: var RstParser, father: PRstNode) =
|
|
while true:
|
|
case currentTok(p).kind
|
|
of tkWhite, tkWord, tkAdornment, tkOther, tkPunct: parseInline(p, father)
|
|
of tkEof, tkIndent: break
|
|
|
|
proc parseSection(p: var RstParser, result: PRstNode) {.gcsafe.}
|
|
|
|
proc tokenAfterNewline(p: RstParser, start: int): int =
|
|
result = start
|
|
while true:
|
|
case p.tok[result].kind
|
|
of tkEof:
|
|
break
|
|
of tkIndent:
|
|
inc result
|
|
break
|
|
else: inc result
|
|
|
|
proc tokenAfterNewline(p: RstParser): int {.inline.} =
|
|
result = tokenAfterNewline(p, p.idx)
|
|
|
|
proc getWrappableIndent(p: RstParser): int =
|
|
## Gets baseline indentation for bodies of field lists and directives.
|
|
## Handles situations like this (with possible de-indent in [case.3])::
|
|
##
|
|
## :field: definition [case.1]
|
|
##
|
|
## currInd currentTok(p).col
|
|
## | |
|
|
## v v
|
|
##
|
|
## .. Note:: defItem: [case.2]
|
|
## definition
|
|
##
|
|
## ^
|
|
## |
|
|
## nextIndent
|
|
##
|
|
## .. Note:: - point1 [case.3]
|
|
## - point 2
|
|
##
|
|
## ^
|
|
## |
|
|
## nextIndent
|
|
if currentTok(p).kind == tkIndent:
|
|
result = currentTok(p).ival
|
|
else:
|
|
var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
|
|
if nextIndent <= currInd(p): # parse only this line [case.1]
|
|
result = currentTok(p).col
|
|
elif nextIndent >= currentTok(p).col: # may be a definition list [case.2]
|
|
result = currentTok(p).col
|
|
else:
|
|
result = nextIndent # allow parsing next lines [case.3]
|
|
|
|
proc getMdBlockIndent(p: RstParser): int =
|
|
## Markdown version of `getWrappableIndent`.
|
|
if currentTok(p).kind == tkIndent:
|
|
result = currentTok(p).ival
|
|
else:
|
|
var nextIndent = p.tok[tokenAfterNewline(p)-1].ival
|
|
# TODO: Markdown-compliant definition should allow nextIndent == currInd(p):
|
|
if nextIndent <= currInd(p): # parse only this line
|
|
result = currentTok(p).col
|
|
else:
|
|
result = nextIndent # allow parsing next lines [case.3]
|
|
|
|
proc indFollows(p: RstParser): bool =
|
|
result = currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p)
|
|
|
|
proc parseBlockContent(p: var RstParser, father: var PRstNode,
|
|
contentParser: SectionParser): bool {.gcsafe.} =
|
|
## parse the final content part of explicit markup blocks (directives,
|
|
## footnotes, etc). Returns true if succeeded.
|
|
if currentTok(p).kind != tkIndent or indFollows(p):
|
|
let blockIndent = getWrappableIndent(p)
|
|
pushInd(p, blockIndent)
|
|
let content = contentParser(p)
|
|
popInd(p)
|
|
father.add content
|
|
result = true
|
|
|
|
proc parseSectionWrapper(p: var RstParser): PRstNode =
|
|
result = newRstNode(rnInner)
|
|
parseSection(p, result)
|
|
while result.kind == rnInner and result.len == 1:
|
|
result = result.sons[0]
|
|
|
|
proc parseField(p: var RstParser): PRstNode =
|
|
## Returns a parsed rnField node.
|
|
##
|
|
## rnField nodes have two children nodes, a rnFieldName and a rnFieldBody.
|
|
result = newRstNode(rnField, info=lineInfo(p))
|
|
var col = currentTok(p).col
|
|
var fieldname = newRstNode(rnFieldName)
|
|
parseUntil(p, fieldname, ":", false)
|
|
var fieldbody = newRstNode(rnFieldBody)
|
|
if currentTok(p).kind == tkWhite: inc p.idx
|
|
let indent = getWrappableIndent(p)
|
|
if indent > col:
|
|
pushInd(p, indent)
|
|
parseSection(p, fieldbody)
|
|
popInd(p)
|
|
result.add(fieldname)
|
|
result.add(fieldbody)
|
|
|
|
proc parseFields(p: var RstParser): PRstNode =
|
|
## Parses fields for a section or directive block.
|
|
##
|
|
## This proc may return nil if the parsing doesn't find anything of value,
|
|
## otherwise it will return a node of rnFieldList type with children.
|
|
result = nil
|
|
var atStart = p.idx == 0 and p.tok[0].symbol == ":"
|
|
if currentTok(p).kind == tkIndent and nextTok(p).symbol == ":" or
|
|
atStart:
|
|
var col = if atStart: currentTok(p).col else: currentTok(p).ival
|
|
result = newRstNodeA(p, rnFieldList)
|
|
if not atStart: inc p.idx
|
|
while true:
|
|
result.add(parseField(p))
|
|
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
|
|
nextTok(p).symbol == ":":
|
|
inc p.idx
|
|
else:
|
|
break
|
|
|
|
proc getFieldValue*(n: PRstNode): string =
|
|
## Returns the value of a specific ``rnField`` node.
|
|
##
|
|
## This proc will assert if the node is not of the expected type. The empty
|
|
## string will be returned as a minimum. Any value in the rst will be
|
|
## stripped form leading/trailing whitespace.
|
|
assert n.kind == rnField
|
|
assert n.len == 2
|
|
assert n.sons[0].kind == rnFieldName
|
|
assert n.sons[1].kind == rnFieldBody
|
|
result = addNodes(n.sons[1]).strip
|
|
|
|
proc getFieldValue(n: PRstNode, fieldname: string): string =
|
|
if n.sons[1] == nil: return
|
|
if n.sons[1].kind != rnFieldList:
|
|
#InternalError("getFieldValue (2): " & $n.sons[1].kind)
|
|
# We don't like internal errors here anymore as that would break the forum!
|
|
return
|
|
for i in 0 ..< n.sons[1].len:
|
|
var f = n.sons[1].sons[i]
|
|
if cmpIgnoreStyle(addNodes(f.sons[0]), fieldname) == 0:
|
|
result = addNodes(f.sons[1])
|
|
if result == "": result = "\x01\x01" # indicates that the field exists
|
|
return
|
|
|
|
proc getArgument(n: PRstNode): string =
|
|
if n.sons[0] == nil: result = ""
|
|
else: result = addNodes(n.sons[0])
|
|
|
|
proc parseDotDot(p: var RstParser): PRstNode {.gcsafe.}
|
|
proc parseLiteralBlock(p: var RstParser): PRstNode =
|
|
result = newRstNodeA(p, rnLiteralBlock)
|
|
var n = newLeaf("")
|
|
if currentTok(p).kind == tkIndent:
|
|
var indent = currentTok(p).ival
|
|
while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
|
|
while true:
|
|
case currentTok(p).kind
|
|
of tkEof:
|
|
break
|
|
of tkIndent:
|
|
if currentTok(p).ival < indent:
|
|
break
|
|
else:
|
|
n.text.add("\n")
|
|
n.text.add(spaces(currentTok(p).ival - indent))
|
|
inc p.idx
|
|
else:
|
|
n.text.add(currentTok(p).symbol)
|
|
inc p.idx
|
|
else:
|
|
while currentTok(p).kind notin {tkIndent, tkEof}:
|
|
n.text.add(currentTok(p).symbol)
|
|
inc p.idx
|
|
result.add(n)
|
|
|
|
proc parseQuotedLiteralBlock(p: var RstParser): PRstNode =
|
|
result = newRstNodeA(p, rnLiteralBlock)
|
|
var n = newLeaf("")
|
|
if currentTok(p).kind == tkIndent:
|
|
var indent = currInd(p)
|
|
while currentTok(p).kind == tkIndent: inc p.idx # skip blank lines
|
|
var quoteSym = currentTok(p).symbol[0]
|
|
while true:
|
|
case currentTok(p).kind
|
|
of tkEof:
|
|
break
|
|
of tkIndent:
|
|
if currentTok(p).ival < indent:
|
|
break
|
|
elif currentTok(p).ival == indent:
|
|
if nextTok(p).kind == tkPunct and nextTok(p).symbol[0] == quoteSym:
|
|
n.text.add("\n")
|
|
inc p.idx
|
|
elif nextTok(p).kind == tkIndent:
|
|
break
|
|
else:
|
|
rstMessage(p, mwRstStyle, "no newline after quoted literal block")
|
|
break
|
|
else:
|
|
rstMessage(p, mwRstStyle,
|
|
"unexpected indentation in quoted literal block")
|
|
break
|
|
else:
|
|
n.text.add(currentTok(p).symbol)
|
|
inc p.idx
|
|
result.add(n)
|
|
|
|
proc parseRstLiteralBlock(p: var RstParser, kind: LiteralBlockKind): PRstNode =
|
|
if kind == lbIndentedLiteralBlock:
|
|
result = parseLiteralBlock(p)
|
|
else:
|
|
result = parseQuotedLiteralBlock(p)
|
|
|
|
proc getLevel(p: var RstParser, c: char, hasOverline: bool): int =
|
|
## Returns (preliminary) heading level corresponding to `c` and
|
|
## `hasOverline`. If level does not exist, add it first.
|
|
for i, hType in p.s.hLevels:
|
|
if hType.symbol == c and hType.hasOverline == hasOverline:
|
|
p.s.hLevels[i].line = curLine(p)
|
|
p.s.hLevels[i].hasPeers = true
|
|
return i
|
|
p.s.hLevels.add LevelInfo(symbol: c, hasOverline: hasOverline,
|
|
line: curLine(p), hasPeers: false)
|
|
result = p.s.hLevels.len - 1
|
|
|
|
proc countTitles(s: PRstSharedState, n: PRstNode) =
|
|
## Fill `s.hTitleCnt`
|
|
if n == nil: return
|
|
for node in n.sons:
|
|
if node != nil:
|
|
if node.kind notin {rnOverline, rnSubstitutionDef, rnDefaultRole}:
|
|
break
|
|
if node.kind == rnOverline:
|
|
if s.hLevels[s.hTitleCnt].hasPeers:
|
|
break
|
|
inc s.hTitleCnt
|
|
if s.hTitleCnt >= 2:
|
|
break
|
|
|
|
proc isAdornmentHeadline(p: RstParser, adornmentIdx: int): bool =
|
|
## check that underline/overline length is enough for the heading.
|
|
## No support for Unicode.
|
|
if p.tok[adornmentIdx].symbol in ["::", "..", "|"]:
|
|
return false
|
|
if isMarkdownCodeBlock(p, adornmentIdx):
|
|
return false
|
|
var headlineLen = 0
|
|
var failure = ""
|
|
if p.idx < adornmentIdx: # check for underline
|
|
if p.idx > 0:
|
|
headlineLen = currentTok(p).col - p.tok[adornmentIdx].col
|
|
if headlineLen > 0:
|
|
rstMessage(p, mwRstStyle, "indentation of heading text allowed" &
|
|
" only for overline titles")
|
|
for i in p.idx ..< adornmentIdx-1: # adornmentIdx-1 is a linebreak
|
|
headlineLen += p.tok[i].symbol.len
|
|
result = p.tok[adornmentIdx].symbol.len >= headlineLen and headlineLen != 0
|
|
if not result:
|
|
failure = "(underline '" & p.tok[adornmentIdx].symbol & "' is too short)"
|
|
else: # p.idx == adornmentIdx, at overline. Check overline and underline
|
|
var i = p.idx + 2
|
|
headlineLen = p.tok[i].col - p.tok[adornmentIdx].col
|
|
while p.tok[i].kind notin {tkEof, tkIndent}:
|
|
headlineLen += p.tok[i].symbol.len
|
|
inc i
|
|
if p.tok[i].kind == tkIndent and
|
|
p.tok[i+1].kind == tkAdornment and
|
|
p.tok[i+1].symbol[0] == p.tok[adornmentIdx].symbol[0]:
|
|
result = p.tok[adornmentIdx].symbol.len >= headlineLen and
|
|
headlineLen != 0
|
|
if result:
|
|
result = p.tok[i+1].symbol == p.tok[adornmentIdx].symbol
|
|
if not result:
|
|
failure = "(underline '" & p.tok[i+1].symbol & "' does not match " &
|
|
"overline '" & p.tok[adornmentIdx].symbol & "')"
|
|
else:
|
|
failure = "(overline '" & p.tok[adornmentIdx].symbol & "' is too short)"
|
|
else: # it's not overline/underline section, not reporting error
|
|
return false
|
|
if not result:
|
|
rstMessage(p, meNewSectionExpected, failure)
|
|
|
|
proc isLineBlock(p: RstParser): bool =
|
|
var j = tokenAfterNewline(p)
|
|
result = currentTok(p).col == p.tok[j].col and p.tok[j].symbol == "|" or
|
|
p.tok[j].col > currentTok(p).col or
|
|
p.tok[j].symbol == "\n"
|
|
|
|
proc isMarkdownBlockQuote(p: RstParser): bool =
|
|
result = currentTok(p).symbol[0] == '>'
|
|
|
|
proc whichRstLiteralBlock(p: RstParser): LiteralBlockKind =
|
|
## Checks that the following tokens are either Indented Literal Block or
|
|
## Quoted Literal Block (which is not quite the same as Markdown quote block).
|
|
## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#quoted-literal-blocks
|
|
if currentTok(p).symbol == "::" and nextTok(p).kind == tkIndent:
|
|
if currInd(p) > nextTok(p).ival:
|
|
result = lbNone
|
|
if currInd(p) < nextTok(p).ival:
|
|
result = lbIndentedLiteralBlock
|
|
elif currInd(p) == nextTok(p).ival:
|
|
var i = p.idx + 1
|
|
while p.tok[i].kind == tkIndent: inc i
|
|
const validQuotingCharacters = {
|
|
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-',
|
|
'.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^',
|
|
'_', '`', '{', '|', '}', '~'}
|
|
if p.tok[i].kind in {tkPunct, tkAdornment} and
|
|
p.tok[i].symbol[0] in validQuotingCharacters:
|
|
result = lbQuotedLiteralBlock
|
|
else:
|
|
result = lbNone
|
|
else:
|
|
result = lbNone
|
|
|
|
proc predNL(p: RstParser): bool =
|
|
result = true
|
|
if p.idx > 0:
|
|
result = prevTok(p).kind == tkIndent and
|
|
prevTok(p).ival == currInd(p)
|
|
|
|
proc isDefList(p: RstParser): bool =
|
|
var j = tokenAfterNewline(p)
|
|
result = currentTok(p).col < p.tok[j].col and
|
|
p.tok[j].kind in {tkWord, tkOther, tkPunct} and
|
|
p.tok[j - 2].symbol != "::"
|
|
|
|
proc `$`(t: Token): string = # for debugging only
|
|
result = "(" & $t.kind & " line=" & $t.line & " col=" & $t.col
|
|
if t.kind == tkIndent: result = result & " ival=" & $t.ival & ")"
|
|
else: result = result & " symbol=" & t.symbol & ")"
|
|
|
|
proc skipNewlines(p: RstParser, j: int): int =
|
|
result = j
|
|
while p.tok[result].kind != tkEof and p.tok[result].kind == tkIndent:
|
|
inc result # skip blank lines
|
|
|
|
proc skipNewlines(p: var RstParser) =
|
|
p.idx = skipNewlines(p, p.idx)
|
|
|
|
const maxMdRelInd = 3 ## In Markdown: maximum indentation that does not yet
|
|
## make the indented block a code
|
|
|
|
proc isMdRelInd(outerInd, nestedInd: int): bool =
|
|
result = outerInd <= nestedInd and nestedInd <= outerInd + maxMdRelInd
|
|
|
|
proc isMdDefBody(p: RstParser, j: int, termCol: int): bool =
|
|
let defCol = p.tok[j].col
|
|
result = p.tok[j].symbol == ":" and
|
|
isMdRelInd(termCol, defCol) and
|
|
p.tok[j+1].kind == tkWhite and
|
|
p.tok[j+2].kind in {tkWord, tkOther, tkPunct}
|
|
|
|
proc isMdDefListItem(p: RstParser, idx: int): bool =
|
|
var j = tokenAfterNewline(p, idx)
|
|
j = skipNewlines(p, j)
|
|
let termCol = p.tok[j].col
|
|
result = isMdRelInd(currInd(p), termCol) and
|
|
isMdDefBody(p, j, termCol)
|
|
|
|
proc isOptionList(p: RstParser): bool =
|
|
result = match(p, p.idx, "-w") or match(p, p.idx, "--w") or
|
|
match(p, p.idx, "/w") or match(p, p.idx, "//w")
|
|
|
|
proc isMarkdownHeadlinePattern(s: string): bool =
|
|
if s.len >= 1 and s.len <= 6:
|
|
for c in s:
|
|
if c != '#': return false
|
|
result = true
|
|
|
|
proc isMarkdownHeadline(p: RstParser): bool =
|
|
if roSupportMarkdown in p.s.options:
|
|
if isMarkdownHeadlinePattern(currentTok(p).symbol) and nextTok(p).kind == tkWhite:
|
|
if p.tok[p.idx+2].kind in {tkWord, tkOther, tkPunct}:
|
|
result = true
|
|
|
|
proc findPipe(p: RstParser, start: int): bool =
|
|
var i = start
|
|
while true:
|
|
if p.tok[i].symbol == "|": return true
|
|
if p.tok[i].kind in {tkIndent, tkEof}: return false
|
|
inc i
|
|
|
|
proc whichSection(p: RstParser): RstNodeKind =
|
|
if currentTok(p).kind in {tkAdornment, tkPunct}:
|
|
# for punctuation sequences that can be both tkAdornment and tkPunct
|
|
if isMarkdownCodeBlock(p):
|
|
return rnCodeBlock
|
|
elif isRst(p) and currentTok(p).symbol == "::":
|
|
return rnLiteralBlock
|
|
elif currentTok(p).symbol == ".." and
|
|
nextTok(p).kind in {tkWhite, tkIndent}:
|
|
return rnDirective
|
|
case currentTok(p).kind
|
|
of tkAdornment:
|
|
if match(p, p.idx + 1, "iI") and currentTok(p).symbol.len >= 4:
|
|
result = rnTransition
|
|
elif match(p, p.idx, "+a+"):
|
|
result = rnGridTable
|
|
rstMessage(p, meGridTableNotImplemented)
|
|
elif match(p, p.idx + 1, " a"): result = rnTable
|
|
elif currentTok(p).symbol == "|" and isLineBlock(p):
|
|
result = rnLineBlock
|
|
elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
|
|
result = rnMarkdownBlockQuote
|
|
elif (match(p, p.idx + 1, "i") and not match(p, p.idx + 2, "I")) and
|
|
isAdornmentHeadline(p, p.idx):
|
|
result = rnOverline
|
|
else:
|
|
result = rnParagraph
|
|
of tkPunct:
|
|
if isMarkdownHeadline(p):
|
|
result = rnMarkdownHeadline
|
|
elif roSupportMarkdown in p.s.options and predNL(p) and
|
|
match(p, p.idx, "| w") and findPipe(p, p.idx+3):
|
|
result = rnMarkdownTable
|
|
elif isMd(p) and isMdFootnoteName(p, reference=false):
|
|
result = rnFootnote
|
|
elif currentTok(p).symbol == "|" and isLineBlock(p):
|
|
result = rnLineBlock
|
|
elif roSupportMarkdown in p.s.options and isMarkdownBlockQuote(p):
|
|
result = rnMarkdownBlockQuote
|
|
elif match(p, tokenAfterNewline(p), "aI") and
|
|
isAdornmentHeadline(p, tokenAfterNewline(p)):
|
|
result = rnHeadline
|
|
elif currentTok(p).symbol in ["+", "*", "-"] and nextTok(p).kind == tkWhite:
|
|
result = rnBulletList
|
|
elif match(p, p.idx, ":w:E"):
|
|
# (currentTok(p).symbol == ":")
|
|
result = rnFieldList
|
|
elif match(p, p.idx, "(e) ") or match(p, p.idx, "e) ") or
|
|
match(p, p.idx, "e. "):
|
|
result = rnEnumList
|
|
elif isOptionList(p):
|
|
result = rnOptionList
|
|
elif isRst(p) and isDefList(p):
|
|
result = rnDefList
|
|
elif isMd(p) and isMdDefListItem(p, p.idx):
|
|
result = rnMdDefList
|
|
else:
|
|
result = rnParagraph
|
|
of tkWord, tkOther, tkWhite:
|
|
let tokIdx = tokenAfterNewline(p)
|
|
if match(p, tokIdx, "aI"):
|
|
if isAdornmentHeadline(p, tokIdx): result = rnHeadline
|
|
else: result = rnParagraph
|
|
elif match(p, p.idx, "e) ") or match(p, p.idx, "e. "): result = rnEnumList
|
|
elif isRst(p) and isDefList(p): result = rnDefList
|
|
elif isMd(p) and isMdDefListItem(p, p.idx):
|
|
result = rnMdDefList
|
|
else: result = rnParagraph
|
|
else: result = rnLeaf
|
|
|
|
proc parseLineBlock(p: var RstParser): PRstNode =
|
|
## Returns rnLineBlock with all sons of type rnLineBlockItem
|
|
result = nil
|
|
if nextTok(p).kind in {tkWhite, tkIndent}:
|
|
var col = currentTok(p).col
|
|
result = newRstNodeA(p, rnLineBlock)
|
|
while true:
|
|
var item = newRstNode(rnLineBlockItem)
|
|
if nextTok(p).kind == tkWhite:
|
|
if nextTok(p).symbol.len > 1: # pass additional indentation after '| '
|
|
item.lineIndent = nextTok(p).symbol
|
|
inc p.idx, 2
|
|
pushInd(p, p.tok[p.idx].col)
|
|
parseSection(p, item)
|
|
popInd(p)
|
|
else: # tkIndent => add an empty line
|
|
item.lineIndent = "\n"
|
|
inc p.idx, 1
|
|
result.add(item)
|
|
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
|
|
nextTok(p).symbol == "|" and
|
|
p.tok[p.idx + 2].kind in {tkWhite, tkIndent}:
|
|
inc p.idx, 1
|
|
else:
|
|
break
|
|
|
|
proc parseDoc(p: var RstParser): PRstNode {.gcsafe.}
|
|
|
|
proc getQuoteSymbol(p: RstParser, idx: int): tuple[sym: string, depth: int, tokens: int] =
|
|
result = ("", 0, 0)
|
|
var i = idx
|
|
result.sym &= p.tok[i].symbol
|
|
result.depth += p.tok[i].symbol.len
|
|
inc result.tokens
|
|
inc i
|
|
while p.tok[i].kind == tkWhite and i+1 < p.tok.len and
|
|
p.tok[i+1].kind == tkPunct and p.tok[i+1].symbol[0] == '>':
|
|
result.sym &= p.tok[i].symbol
|
|
result.sym &= p.tok[i+1].symbol
|
|
result.depth += p.tok[i+1].symbol.len
|
|
inc result.tokens, 2
|
|
inc i, 2
|
|
|
|
proc parseMarkdownQuoteSegment(p: var RstParser, curSym: string, col: int):
|
|
PRstNode =
|
|
## We define *segment* as a group of lines that starts with exactly the
|
|
## same quote symbol. If the following lines don't contain any `>` (*lazy*
|
|
## continuation) they considered as continuation of the current segment.
|
|
var q: RstParser # to delete `>` at a start of line and then parse normally
|
|
initParser(q, p.s)
|
|
q.col = p.col
|
|
q.line = p.line
|
|
var minCol = int.high # minimum colum num in the segment
|
|
while true: # move tokens of segment from `p` to `q` skipping `curSym`
|
|
case currentTok(p).kind
|
|
of tkEof:
|
|
break
|
|
of tkIndent:
|
|
if nextTok(p).kind in {tkIndent, tkEof}:
|
|
break
|
|
else:
|
|
if nextTok(p).symbol[0] == '>':
|
|
var (quoteSym, _, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
|
|
if quoteSym == curSym: # the segment continues
|
|
var iTok = tokenAfterNewline(p, p.idx+1)
|
|
if p.tok[iTok].kind notin {tkEof, tkIndent} and
|
|
p.tok[iTok].symbol[0] != '>':
|
|
rstMessage(p, mwRstStyle,
|
|
"two or more quoted lines are followed by unquoted line " &
|
|
$(curLine(p) + 1))
|
|
break
|
|
q.tok.add currentTok(p)
|
|
var ival = currentTok(p).ival + quoteSym.len
|
|
inc p.idx, (1 + quoteTokens) # skip newline and > > >
|
|
if currentTok(p).kind == tkWhite:
|
|
ival += currentTok(p).symbol.len
|
|
inc p.idx
|
|
# fix up previous `tkIndent`s to ival (as if >>> were not there)
|
|
var j = q.tok.len - 1
|
|
while j >= 0 and q.tok[j].kind == tkIndent:
|
|
q.tok[j].ival = ival
|
|
dec j
|
|
else: # next segment started
|
|
break
|
|
elif currentTok(p).ival < col:
|
|
break
|
|
else: # the segment continues, a case like:
|
|
# > beginning
|
|
# continuation
|
|
q.tok.add currentTok(p)
|
|
inc p.idx
|
|
else:
|
|
if currentTok(p).col < minCol: minCol = currentTok(p).col
|
|
q.tok.add currentTok(p)
|
|
inc p.idx
|
|
q.indentStack = @[minCol]
|
|
# if initial indentation `minCol` is > 0 then final newlines
|
|
# should be omitted so that parseDoc could advance to the end of tokens:
|
|
var j = q.tok.len - 1
|
|
while q.tok[j].kind == tkIndent: dec j
|
|
q.tok.setLen (j+1)
|
|
q.tok.add Token(kind: tkEof, line: currentTok(p).line)
|
|
result = parseDoc(q)
|
|
|
|
proc parseMarkdownBlockQuote(p: var RstParser): PRstNode =
|
|
var (curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx)
|
|
let col = currentTok(p).col
|
|
result = newRstNodeA(p, rnMarkdownBlockQuote)
|
|
inc p.idx, quoteTokens # skip first >
|
|
while true:
|
|
var item = newRstNode(rnMarkdownBlockQuoteItem)
|
|
item.quotationDepth = quotationDepth
|
|
if currentTok(p).kind == tkWhite: inc p.idx
|
|
item.add parseMarkdownQuoteSegment(p, curSym, col)
|
|
result.add(item)
|
|
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
|
|
nextTok(p).kind != tkEof and nextTok(p).symbol[0] == '>':
|
|
(curSym, quotationDepth, quoteTokens) = getQuoteSymbol(p, p.idx + 1)
|
|
inc p.idx, (1 + quoteTokens) # skip newline and > > >
|
|
else:
|
|
break
|
|
|
|
proc parseParagraph(p: var RstParser, result: PRstNode) =
|
|
while true:
|
|
case currentTok(p).kind
|
|
of tkIndent:
|
|
if nextTok(p).kind == tkIndent:
|
|
inc p.idx
|
|
break # blank line breaks paragraph for both Md & Rst
|
|
elif currentTok(p).ival == currInd(p) or (
|
|
isMd(p) and currentTok(p).ival > currInd(p)):
|
|
# (Md allows adding additional indentation inside paragraphs)
|
|
inc p.idx
|
|
case whichSection(p)
|
|
of rnParagraph, rnLeaf, rnHeadline, rnMarkdownHeadline,
|
|
rnOverline, rnDirective:
|
|
result.add newLeaf(" ")
|
|
of rnLineBlock:
|
|
result.addIfNotNil(parseLineBlock(p))
|
|
of rnMarkdownBlockQuote:
|
|
result.addIfNotNil(parseMarkdownBlockQuote(p))
|
|
else:
|
|
dec p.idx # allow subsequent block to be parsed as another section
|
|
break
|
|
else:
|
|
break
|
|
of tkPunct:
|
|
if isRst(p) and (
|
|
let literalBlockKind = whichRstLiteralBlock(p);
|
|
literalBlockKind != lbNone):
|
|
result.add newLeaf(":")
|
|
inc p.idx # skip '::'
|
|
result.add(parseRstLiteralBlock(p, literalBlockKind))
|
|
break
|
|
else:
|
|
parseInline(p, result)
|
|
of tkWhite, tkWord, tkAdornment, tkOther:
|
|
parseInline(p, result)
|
|
else: break
|
|
|
|
proc checkHeadingHierarchy(p: RstParser, lvl: int) =
|
|
if lvl - p.s.hCurLevel > 1: # broken hierarchy!
|
|
proc descr(l: int): string =
|
|
(if p.s.hLevels[l].hasOverline: "overline " else: "underline ") &
|
|
repeat(p.s.hLevels[l].symbol, 5)
|
|
var msg = "(section level inconsistent: "
|
|
msg.add descr(lvl) & " unexpectedly found, " &
|
|
"while the following intermediate section level(s) are missing on lines "
|
|
msg.add $p.s.hLevels[p.s.hCurLevel].line & ".." & $curLine(p) & ":"
|
|
for l in p.s.hCurLevel+1 .. lvl-1:
|
|
msg.add " " & descr(l)
|
|
if l != lvl-1: msg.add ","
|
|
rstMessage(p, meNewSectionExpected, msg & ")")
|
|
|
|
proc parseHeadline(p: var RstParser): PRstNode =
|
|
if isMarkdownHeadline(p):
|
|
result = newRstNode(rnMarkdownHeadline)
|
|
# Note that level hierarchy is not checked for markdown headings
|
|
result.level = currentTok(p).symbol.len
|
|
assert(nextTok(p).kind == tkWhite)
|
|
inc p.idx, 2
|
|
parseUntilNewline(p, result)
|
|
else:
|
|
result = newRstNode(rnHeadline)
|
|
parseUntilNewline(p, result)
|
|
assert(currentTok(p).kind == tkIndent)
|
|
assert(nextTok(p).kind == tkAdornment)
|
|
var c = nextTok(p).symbol[0]
|
|
inc p.idx, 2
|
|
result.level = getLevel(p, c, hasOverline=false)
|
|
checkHeadingHierarchy(p, result.level)
|
|
p.s.hCurLevel = result.level
|
|
addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor)
|
|
p.s.tocPart.add result
|
|
|
|
proc parseOverline(p: var RstParser): PRstNode =
|
|
var c = currentTok(p).symbol[0]
|
|
inc p.idx, 2
|
|
result = newRstNode(rnOverline)
|
|
while true:
|
|
parseUntilNewline(p, result)
|
|
if currentTok(p).kind == tkIndent:
|
|
inc p.idx
|
|
if prevTok(p).ival > currInd(p):
|
|
result.add newLeaf(" ")
|
|
else:
|
|
break
|
|
else:
|
|
break
|
|
result.level = getLevel(p, c, hasOverline=true)
|
|
checkHeadingHierarchy(p, result.level)
|
|
p.s.hCurLevel = result.level
|
|
if currentTok(p).kind == tkAdornment:
|
|
inc p.idx
|
|
if currentTok(p).kind == tkIndent: inc p.idx
|
|
addAnchorRst(p, linkName(result), result, anchorType=headlineAnchor)
|
|
p.s.tocPart.add result
|
|
|
|
proc fixHeadlines(s: PRstSharedState) =
|
|
# Fix up section levels depending on presence of a title and subtitle:
|
|
for n in s.tocPart:
|
|
if n.kind in {rnHeadline, rnOverline}:
|
|
if s.hTitleCnt == 2:
|
|
if n.level == 1: # it's the subtitle
|
|
n.level = 0
|
|
elif n.level >= 2: # normal sections, start numbering from 1
|
|
n.level -= 1
|
|
elif s.hTitleCnt == 0:
|
|
n.level += 1
|
|
# Set headline anchors:
|
|
for iHeading in 0 .. s.tocPart.high:
|
|
let n: PRstNode = s.tocPart[iHeading]
|
|
if n.level >= 1:
|
|
n.anchor = rstnodeToRefname(n)
|
|
# Fix anchors for uniqueness if `.. contents::` is present
|
|
if s.hasToc:
|
|
# Find the last higher level section for unique reference name
|
|
var sectionPrefix = ""
|
|
for i in countdown(iHeading - 1, 0):
|
|
if s.tocPart[i].level >= 1 and s.tocPart[i].level < n.level:
|
|
sectionPrefix = rstnodeToRefname(s.tocPart[i]) & "-"
|
|
break
|
|
if sectionPrefix != "":
|
|
n.anchor = sectionPrefix & n.anchor
|
|
s.tocPart.setLen 0
|
|
|
|
type
|
|
ColSpec = object
|
|
start, stop: int
|
|
RstCols = seq[ColSpec]
|
|
ColumnLimits = tuple # for Markdown
|
|
first, last: int
|
|
ColSeq = seq[ColumnLimits]
|
|
|
|
proc tokStart(p: RstParser, idx: int): int =
|
|
result = p.tok[idx].col
|
|
|
|
proc tokStart(p: RstParser): int =
|
|
result = tokStart(p, p.idx)
|
|
|
|
proc tokEnd(p: RstParser, idx: int): int =
|
|
result = p.tok[idx].col + p.tok[idx].symbol.len - 1
|
|
|
|
proc tokEnd(p: RstParser): int =
|
|
result = tokEnd(p, p.idx)
|
|
|
|
proc getColumns(p: RstParser, cols: var RstCols, startIdx: int): int =
|
|
# Fills table column specification (or separator) `cols` and returns
|
|
# the next parser index after it.
|
|
var L = 0
|
|
result = startIdx
|
|
while true:
|
|
inc L
|
|
setLen(cols, L)
|
|
cols[L - 1].start = tokStart(p, result)
|
|
cols[L - 1].stop = tokEnd(p, result)
|
|
assert(p.tok[result].kind == tkAdornment)
|
|
inc result
|
|
if p.tok[result].kind != tkWhite: break
|
|
inc result
|
|
if p.tok[result].kind != tkAdornment: break
|
|
if p.tok[result].kind == tkIndent: inc result
|
|
|
|
proc checkColumns(p: RstParser, cols: RstCols) =
|
|
var i = p.idx
|
|
if p.tok[i].symbol[0] != '=':
|
|
stopOrWarn(p, meIllformedTable,
|
|
"only tables with `=` columns specification are allowed")
|
|
for col in 0 ..< cols.len:
|
|
if tokEnd(p, i) != cols[col].stop:
|
|
stopOrWarn(p, meIllformedTable,
|
|
"end of table column #$1 should end at position $2" % [
|
|
$(col+1), $(cols[col].stop+ColRstOffset)],
|
|
p.tok[i].line, tokEnd(p, i))
|
|
inc i
|
|
if col == cols.len - 1:
|
|
if p.tok[i].kind == tkWhite:
|
|
inc i
|
|
if p.tok[i].kind notin {tkIndent, tkEof}:
|
|
stopOrWarn(p, meIllformedTable, "extraneous column specification")
|
|
elif p.tok[i].kind == tkWhite:
|
|
inc i
|
|
else:
|
|
stopOrWarn(p, meIllformedTable,
|
|
"no enough table columns", p.tok[i].line, p.tok[i].col)
|
|
|
|
proc getSpans(p: RstParser, nextLine: int,
|
|
cols: RstCols, unitedCols: RstCols): seq[int] =
|
|
## Calculates how many columns a joined cell occupies.
|
|
if unitedCols.len > 0:
|
|
result = newSeq[int](unitedCols.len)
|
|
var
|
|
iCell = 0
|
|
jCell = 0
|
|
uCell = 0
|
|
while jCell < cols.len:
|
|
if cols[jCell].stop < unitedCols[uCell].stop:
|
|
inc jCell
|
|
elif cols[jCell].stop == unitedCols[uCell].stop:
|
|
result[uCell] = jCell - iCell + 1
|
|
iCell = jCell + 1
|
|
jCell = jCell + 1
|
|
inc uCell
|
|
else:
|
|
rstMessage(p, meIllformedTable,
|
|
"spanning underline does not match main table columns",
|
|
p.tok[nextLine].line, p.tok[nextLine].col)
|
|
|
|
proc parseSimpleTableRow(p: var RstParser, cols: RstCols, colChar: char): PRstNode =
|
|
## Parses 1 row in RST simple table.
|
|
# Consider that columns may be spanning (united by using underline like ----):
|
|
let nextLine = tokenAfterNewline(p)
|
|
var unitedCols: RstCols
|
|
var afterSpan: int
|
|
if p.tok[nextLine].kind == tkAdornment and p.tok[nextLine].symbol[0] == '-':
|
|
afterSpan = getColumns(p, unitedCols, nextLine)
|
|
if unitedCols == cols and p.tok[nextLine].symbol[0] == colChar:
|
|
# legacy rst.nim compat.: allow punctuation like `----` in main boundaries
|
|
afterSpan = nextLine
|
|
unitedCols.setLen 0
|
|
else:
|
|
afterSpan = nextLine
|
|
template colEnd(i): int =
|
|
if i == cols.len - 1: high(int) # last column has no limit
|
|
elif unitedCols.len > 0: unitedCols[i].stop else: cols[i].stop
|
|
template colStart(i): int =
|
|
if unitedCols.len > 0: unitedCols[i].start else: cols[i].start
|
|
var row = newSeq[string](if unitedCols.len > 0: unitedCols.len else: cols.len)
|
|
var spans: seq[int] = getSpans(p, nextLine, cols, unitedCols)
|
|
|
|
let line = currentTok(p).line
|
|
# Iterate over the lines a single cell may span:
|
|
while true:
|
|
var nCell = 0
|
|
# distribute tokens between cells in the current line:
|
|
while currentTok(p).kind notin {tkIndent, tkEof}:
|
|
if tokEnd(p) <= colEnd(nCell):
|
|
if tokStart(p) < colStart(nCell):
|
|
if currentTok(p).kind != tkWhite:
|
|
stopOrWarn(p, meIllformedTable,
|
|
"this word crosses table column from the left")
|
|
row[nCell].add(currentTok(p).symbol)
|
|
else:
|
|
row[nCell].add(currentTok(p).symbol)
|
|
inc p.idx
|
|
else:
|
|
if tokStart(p) < colEnd(nCell) and currentTok(p).kind != tkWhite:
|
|
stopOrWarn(p, meIllformedTable,
|
|
"this word crosses table column from the right")
|
|
row[nCell].add(currentTok(p).symbol)
|
|
inc p.idx
|
|
inc nCell
|
|
if currentTok(p).kind == tkIndent: inc p.idx
|
|
if tokEnd(p) <= colEnd(0): break
|
|
# Continued current cells because the 1st column is empty.
|
|
if currentTok(p).kind in {tkEof, tkAdornment}:
|
|
break
|
|
for nCell in countup(1, high(row)): row[nCell].add('\n')
|
|
result = newRstNode(rnTableRow)
|
|
var q: RstParser
|
|
for uCell in 0 ..< row.len:
|
|
initParser(q, p.s)
|
|
q.col = colStart(uCell)
|
|
q.line = line - 1
|
|
getTokens(row[uCell], q.tok)
|
|
let cell = newRstNode(rnTableDataCell)
|
|
cell.span = if spans.len == 0: 0 else: spans[uCell]
|
|
cell.add(parseDoc(q))
|
|
result.add(cell)
|
|
if afterSpan > p.idx:
|
|
p.idx = afterSpan
|
|
|
|
proc parseSimpleTable(p: var RstParser): PRstNode =
|
|
var cols: RstCols
|
|
result = newRstNodeA(p, rnTable)
|
|
let startIdx = getColumns(p, cols, p.idx)
|
|
let colChar = currentTok(p).symbol[0]
|
|
checkColumns(p, cols)
|
|
p.idx = startIdx
|
|
result.colCount = cols.len
|
|
while true:
|
|
if currentTok(p).kind == tkAdornment:
|
|
checkColumns(p, cols)
|
|
p.idx = tokenAfterNewline(p)
|
|
if currentTok(p).kind in {tkEof, tkIndent}:
|
|
# skip last adornment line:
|
|
break
|
|
if result.sons.len > 0: result.sons[^1].endsHeader = true
|
|
# fix rnTableDataCell -> rnTableHeaderCell for previous table rows:
|
|
for nRow in 0 ..< result.sons.len:
|
|
for nCell in 0 ..< result.sons[nRow].len:
|
|
template cell: PRstNode = result.sons[nRow].sons[nCell]
|
|
cell = PRstNode(kind: rnTableHeaderCell, sons: cell.sons,
|
|
span: cell.span, anchor: cell.anchor)
|
|
if currentTok(p).kind == tkEof: break
|
|
let tabRow = parseSimpleTableRow(p, cols, colChar)
|
|
result.add tabRow
|
|
|
|
proc readTableRow(p: var RstParser): ColSeq =
|
|
if currentTok(p).symbol == "|": inc p.idx
|
|
while currentTok(p).kind notin {tkIndent, tkEof}:
|
|
var limits: ColumnLimits
|
|
limits.first = p.idx
|
|
while currentTok(p).kind notin {tkIndent, tkEof}:
|
|
if currentTok(p).symbol == "|" and prevTok(p).symbol != "\\": break
|
|
inc p.idx
|
|
limits.last = p.idx
|
|
result.add(limits)
|
|
if currentTok(p).kind in {tkIndent, tkEof}: break
|
|
inc p.idx
|
|
p.idx = tokenAfterNewline(p)
|
|
|
|
proc getColContents(p: var RstParser, colLim: ColumnLimits): string =
|
|
for i in colLim.first ..< colLim.last:
|
|
result.add(p.tok[i].symbol)
|
|
result.strip
|
|
|
|
proc isValidDelimiterRow(p: var RstParser, colNum: int): bool =
|
|
let row = readTableRow(p)
|
|
if row.len != colNum: return false
|
|
for limits in row:
|
|
let content = getColContents(p, limits)
|
|
if content.len < 3 or not (content.startsWith("--") or content.startsWith(":-")):
|
|
return false
|
|
return true
|
|
|
|
proc parseMarkdownTable(p: var RstParser): PRstNode =
|
|
var
|
|
row: ColSeq
|
|
a, b: PRstNode
|
|
q: RstParser
|
|
result = newRstNodeA(p, rnMarkdownTable)
|
|
|
|
proc parseRow(p: var RstParser, cellKind: RstNodeKind, result: PRstNode) =
|
|
row = readTableRow(p)
|
|
if result.colCount == 0: result.colCount = row.len # table header
|
|
elif row.len < result.colCount: row.setLen(result.colCount)
|
|
a = newRstNode(rnTableRow)
|
|
for j in 0 ..< result.colCount:
|
|
b = newRstNode(cellKind)
|
|
initParser(q, p.s)
|
|
q.col = p.col
|
|
q.line = currentTok(p).line - 1
|
|
getTokens(getColContents(p, row[j]), q.tok)
|
|
b.add(parseDoc(q))
|
|
a.add(b)
|
|
result.add(a)
|
|
|
|
parseRow(p, rnTableHeaderCell, result)
|
|
if not isValidDelimiterRow(p, result.colCount):
|
|
rstMessage(p, meMarkdownIllformedTable)
|
|
while predNL(p) and currentTok(p).symbol == "|":
|
|
parseRow(p, rnTableDataCell, result)
|
|
|
|
proc parseTransition(p: var RstParser): PRstNode =
|
|
result = newRstNodeA(p, rnTransition)
|
|
inc p.idx
|
|
if currentTok(p).kind == tkIndent: inc p.idx
|
|
if currentTok(p).kind == tkIndent: inc p.idx
|
|
|
|
proc parseBulletList(p: var RstParser): PRstNode =
|
|
result = nil
|
|
if nextTok(p).kind == tkWhite:
|
|
var bullet = currentTok(p).symbol
|
|
var col = currentTok(p).col
|
|
result = newRstNodeA(p, rnBulletList)
|
|
pushInd(p, p.tok[p.idx + 2].col)
|
|
inc p.idx, 2
|
|
while true:
|
|
var item = newRstNode(rnBulletItem)
|
|
parseSection(p, item)
|
|
result.add(item)
|
|
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
|
|
nextTok(p).symbol == bullet and
|
|
p.tok[p.idx + 2].kind == tkWhite:
|
|
inc p.idx, 3
|
|
else:
|
|
break
|
|
popInd(p)
|
|
|
|
proc parseOptionList(p: var RstParser): PRstNode =
|
|
result = newRstNodeA(p, rnOptionList)
|
|
let col = currentTok(p).col
|
|
var order = 1
|
|
while true:
|
|
if currentTok(p).col == col and isOptionList(p):
|
|
var a = newRstNode(rnOptionGroup)
|
|
var b = newRstNode(rnDescription)
|
|
var c = newRstNode(rnOptionListItem)
|
|
if match(p, p.idx, "//w"): inc p.idx
|
|
while currentTok(p).kind notin {tkIndent, tkEof}:
|
|
if currentTok(p).kind == tkWhite and currentTok(p).symbol.len > 1:
|
|
inc p.idx
|
|
break
|
|
a.add(newLeaf(p))
|
|
inc p.idx
|
|
var j = tokenAfterNewline(p)
|
|
if j > 0 and p.tok[j - 1].kind == tkIndent and p.tok[j - 1].ival > currInd(p):
|
|
pushInd(p, p.tok[j - 1].ival)
|
|
parseSection(p, b)
|
|
popInd(p)
|
|
else:
|
|
parseLine(p, b)
|
|
while currentTok(p).kind == tkIndent: inc p.idx
|
|
c.add(a)
|
|
c.add(b)
|
|
c.order = order; inc order
|
|
result.add(c)
|
|
else:
|
|
if currentTok(p).kind != tkEof: dec p.idx # back to tkIndent
|
|
break
|
|
|
|
proc parseMdDefinitionList(p: var RstParser): PRstNode =
|
|
## Parses (Pandoc/kramdown/PHPextra) Markdown definition lists.
|
|
result = newRstNodeA(p, rnMdDefList)
|
|
let termCol = currentTok(p).col
|
|
while true:
|
|
var item = newRstNode(rnDefItem)
|
|
var term = newRstNode(rnDefName)
|
|
parseLine(p, term)
|
|
skipNewlines(p)
|
|
inc p.idx, 2 # skip ":" and space
|
|
item.add(term)
|
|
while true:
|
|
var def = newRstNode(rnDefBody)
|
|
let indent = getMdBlockIndent(p)
|
|
pushInd(p, indent)
|
|
parseSection(p, def)
|
|
popInd(p)
|
|
item.add(def)
|
|
let j = skipNewlines(p, p.idx)
|
|
if isMdDefBody(p, j, termCol): # parse next definition body
|
|
p.idx = j + 2 # skip ":" and space
|
|
else:
|
|
break
|
|
result.add(item)
|
|
let j = skipNewlines(p, p.idx)
|
|
if p.tok[j].col == termCol and isMdDefListItem(p, j):
|
|
p.idx = j # parse next item
|
|
else:
|
|
break
|
|
|
|
proc parseDefinitionList(p: var RstParser): PRstNode =
|
|
result = nil
|
|
var j = tokenAfterNewline(p) - 1
|
|
if j >= 1 and p.tok[j].kind == tkIndent and
|
|
p.tok[j].ival > currInd(p) and p.tok[j - 1].symbol != "::":
|
|
var col = currentTok(p).col
|
|
result = newRstNodeA(p, rnDefList)
|
|
while true:
|
|
if isOptionList(p):
|
|
break # option list has priority over def.list
|
|
j = p.idx
|
|
var a = newRstNode(rnDefName)
|
|
parseLine(p, a)
|
|
if currentTok(p).kind == tkIndent and
|
|
currentTok(p).ival > currInd(p) and
|
|
nextTok(p).symbol != "::" and
|
|
nextTok(p).kind notin {tkIndent, tkEof}:
|
|
pushInd(p, currentTok(p).ival)
|
|
var b = newRstNode(rnDefBody)
|
|
parseSection(p, b)
|
|
var c = newRstNode(rnDefItem)
|
|
c.add(a)
|
|
c.add(b)
|
|
result.add(c)
|
|
popInd(p)
|
|
else:
|
|
p.idx = j
|
|
break
|
|
if currentTok(p).kind == tkIndent and currentTok(p).ival == col:
|
|
inc p.idx
|
|
j = tokenAfterNewline(p) - 1
|
|
if j >= 1 and p.tok[j].kind == tkIndent and p.tok[j].ival > col and
|
|
p.tok[j-1].symbol != "::" and p.tok[j+1].kind != tkIndent:
|
|
discard
|
|
else:
|
|
break
|
|
if result.len == 0: result = nil
|
|
|
|
proc parseEnumList(p: var RstParser): PRstNode =
|
|
const
|
|
wildcards: array[0..5, string] = ["(n) ", "n) ", "n. ",
|
|
"(x) ", "x) ", "x. "]
|
|
# enumerator patterns, where 'x' means letter and 'n' means number
|
|
wildToken: array[0..5, int] = [4, 3, 3, 4, 3, 3] # number of tokens
|
|
wildIndex: array[0..5, int] = [1, 0, 0, 1, 0, 0]
|
|
# position of enumeration sequence (number/letter) in enumerator
|
|
let col = currentTok(p).col
|
|
var w = 0
|
|
while w < wildcards.len:
|
|
if match(p, p.idx, wildcards[w]): break
|
|
inc w
|
|
assert w < wildcards.len
|
|
|
|
proc checkAfterNewline(p: RstParser, report: bool): bool =
|
|
## If no indentation on the next line then parse as a normal paragraph
|
|
## according to the RST spec. And report a warning with suggestions
|
|
let j = tokenAfterNewline(p, start=p.idx+1)
|
|
let requiredIndent = p.tok[p.idx+wildToken[w]].col
|
|
if p.tok[j].kind notin {tkIndent, tkEof} and
|
|
p.tok[j].col < requiredIndent and
|
|
(p.tok[j].col > col or
|
|
(p.tok[j].col == col and not match(p, j, wildcards[w]))):
|
|
if report:
|
|
let n = p.line + p.tok[j].line
|
|
let msg = "\n" & """
|
|
not enough indentation on line $2
|
|
(should be at column $3 if it's a continuation of enum. list),
|
|
or no blank line after line $1 (if it should be the next paragraph),
|
|
or no escaping \ at the beginning of line $1
|
|
(if lines $1..$2 are a normal paragraph, not enum. list)""".dedent
|
|
let c = p.col + requiredIndent + ColRstOffset
|
|
rstMessage(p, mwRstStyle, msg % [$(n-1), $n, $c],
|
|
p.tok[j].line, p.tok[j].col)
|
|
result = false
|
|
else:
|
|
result = true
|
|
|
|
if not checkAfterNewline(p, report = true):
|
|
return nil
|
|
result = newRstNodeA(p, rnEnumList)
|
|
let autoEnums = if roSupportMarkdown in p.s.options: @["#", "1"] else: @["#"]
|
|
var prevAE = "" # so as not allow mixing auto-enumerators `1` and `#`
|
|
var curEnum = 1
|
|
for i in 0 ..< wildToken[w]-1: # add first enumerator with (, ), and .
|
|
if p.tok[p.idx + i].symbol == "#":
|
|
prevAE = "#"
|
|
result.labelFmt.add "1"
|
|
else:
|
|
result.labelFmt.add p.tok[p.idx + i].symbol
|
|
var prevEnum = p.tok[p.idx + wildIndex[w]].symbol
|
|
inc p.idx, wildToken[w]
|
|
while true:
|
|
var item = newRstNode(rnEnumItem)
|
|
pushInd(p, currentTok(p).col)
|
|
parseSection(p, item)
|
|
popInd(p)
|
|
result.add(item)
|
|
if currentTok(p).kind == tkIndent and currentTok(p).ival == col and
|
|
match(p, p.idx+1, wildcards[w]):
|
|
# don't report to avoid duplication of warning since for
|
|
# subsequent enum. items parseEnumList will be called second time:
|
|
if not checkAfterNewline(p, report = false):
|
|
break
|
|
let enumerator = p.tok[p.idx + 1 + wildIndex[w]].symbol
|
|
# check that it's in sequence: enumerator == next(prevEnum)
|
|
if "n" in wildcards[w]: # arabic numeral
|
|
let prevEnumI = try: parseInt(prevEnum) except ValueError: 1
|
|
if enumerator in autoEnums:
|
|
if prevAE != "" and enumerator != prevAE:
|
|
break
|
|
prevAE = enumerator
|
|
curEnum = prevEnumI + 1
|
|
else: curEnum = (try: parseInt(enumerator) except ValueError: 1)
|
|
if curEnum - prevEnumI != 1:
|
|
break
|
|
prevEnum = enumerator
|
|
else: # a..z
|
|
let prevEnumI = ord(prevEnum[0])
|
|
if enumerator == "#": curEnum = prevEnumI + 1
|
|
else: curEnum = ord(enumerator[0])
|
|
if curEnum - prevEnumI != 1:
|
|
break
|
|
prevEnum = $chr(curEnum)
|
|
inc p.idx, 1 + wildToken[w]
|
|
else:
|
|
break
|
|
|
|
proc prefix(ftnType: FootnoteType): string =
|
|
case ftnType
|
|
of fnManualNumber: result = "footnote-"
|
|
of fnAutoNumber: result = "footnoteauto-"
|
|
of fnAutoNumberLabel: result = "footnote-"
|
|
of fnAutoSymbol: result = "footnotesym-"
|
|
of fnCitation: result = "citation-"
|
|
|
|
proc parseFootnote(p: var RstParser): PRstNode {.gcsafe.} =
|
|
## Parses footnotes and citations, always returns 2 sons:
|
|
##
|
|
## 1) footnote label, always containing rnInner with 1 or more sons
|
|
## 2) footnote body, which may be nil
|
|
var label: PRstNode
|
|
if isRst(p):
|
|
inc p.idx # skip space after `..`
|
|
label = parseFootnoteName(p, reference=false)
|
|
if label == nil:
|
|
if isRst(p):
|
|
dec p.idx
|
|
return nil
|
|
result = newRstNode(rnFootnote)
|
|
result.add label
|
|
let (fnType, i) = getFootnoteType(p.s, label)
|
|
var name = ""
|
|
var anchor = fnType.prefix
|
|
case fnType
|
|
of fnManualNumber:
|
|
addFootnoteNumManual(p, i)
|
|
anchor.add $i
|
|
of fnAutoNumber, fnAutoNumberLabel:
|
|
name = rstnodeToRefname(label)
|
|
addFootnoteNumAuto(p, name)
|
|
if fnType == fnAutoNumberLabel:
|
|
anchor.add name
|
|
else: # fnAutoNumber
|
|
result.order = p.s.lineFootnoteNum.len
|
|
anchor.add $result.order
|
|
of fnAutoSymbol:
|
|
addFootnoteSymAuto(p)
|
|
result.order = p.s.lineFootnoteSym.len
|
|
anchor.add $p.s.lineFootnoteSym.len
|
|
of fnCitation:
|
|
anchor.add rstnodeToRefname(label)
|
|
addAnchorRst(p, anchor, target = result, anchorType = footnoteAnchor)
|
|
result.anchor = anchor
|
|
if currentTok(p).kind == tkWhite: inc p.idx
|
|
discard parseBlockContent(p, result, parseSectionWrapper)
|
|
if result.len < 2:
|
|
result.add nil
|
|
|
|
proc sonKind(father: PRstNode, i: int): RstNodeKind =
|
|
result = rnLeaf
|
|
if i < father.len: result = father.sons[i].kind
|
|
|
|
proc parseSection(p: var RstParser, result: PRstNode) =
|
|
## parse top-level RST elements: sections, transitions and body elements.
|
|
while true:
|
|
var leave = false
|
|
assert(p.idx >= 0)
|
|
while currentTok(p).kind == tkIndent:
|
|
if currInd(p) == currentTok(p).ival:
|
|
inc p.idx
|
|
elif currentTok(p).ival > currInd(p):
|
|
if roPreferMarkdown in p.s.options: # Markdown => normal paragraphs
|
|
if currentTok(p).ival - currInd(p) >= 4:
|
|
result.add parseLiteralBlock(p)
|
|
else:
|
|
pushInd(p, currentTok(p).ival)
|
|
parseSection(p, result)
|
|
popInd(p)
|
|
else: # RST mode => block quotes
|
|
pushInd(p, currentTok(p).ival)
|
|
var a = newRstNodeA(p, rnBlockQuote)
|
|
parseSection(p, a)
|
|
result.add(a)
|
|
popInd(p)
|
|
else:
|
|
while currentTok(p).kind != tkEof and nextTok(p).kind == tkIndent:
|
|
inc p.idx # skip blank lines
|
|
leave = true
|
|
break
|
|
if leave or currentTok(p).kind == tkEof: break
|
|
var a: PRstNode = nil
|
|
var k = whichSection(p)
|
|
case k
|
|
of rnLiteralBlock:
|
|
inc p.idx # skip '::'
|
|
a = parseLiteralBlock(p)
|
|
of rnBulletList: a = parseBulletList(p)
|
|
of rnLineBlock: a = parseLineBlock(p)
|
|
of rnMarkdownBlockQuote: a = parseMarkdownBlockQuote(p)
|
|
of rnDirective: a = parseDotDot(p)
|
|
of rnFootnote: a = parseFootnote(p)
|
|
of rnEnumList: a = parseEnumList(p)
|
|
of rnLeaf: rstMessage(p, meNewSectionExpected, "(syntax error)")
|
|
of rnParagraph: discard
|
|
of rnDefList: a = parseDefinitionList(p)
|
|
of rnMdDefList: a = parseMdDefinitionList(p)
|
|
of rnFieldList:
|
|
if p.idx > 0: dec p.idx
|
|
a = parseFields(p)
|
|
of rnTransition: a = parseTransition(p)
|
|
of rnHeadline, rnMarkdownHeadline: a = parseHeadline(p)
|
|
of rnOverline: a = parseOverline(p)
|
|
of rnTable: a = parseSimpleTable(p)
|
|
of rnMarkdownTable: a = parseMarkdownTable(p)
|
|
of rnOptionList: a = parseOptionList(p)
|
|
else:
|
|
#InternalError("rst.parseSection()")
|
|
discard
|
|
if a == nil and k != rnDirective:
|
|
a = newRstNodeA(p, rnParagraph)
|
|
parseParagraph(p, a)
|
|
result.addIfNotNil(a)
|
|
if sonKind(result, 0) == rnParagraph and sonKind(result, 1) != rnParagraph:
|
|
result.sons[0] = newRstNode(rnInner, result.sons[0].sons,
|
|
anchor=result.sons[0].anchor)
|
|
|
|
proc parseDoc(p: var RstParser): PRstNode =
|
|
result = parseSectionWrapper(p)
|
|
if currentTok(p).kind != tkEof:
|
|
rstMessage(p, meGeneralParseError)
|
|
|
|
type
|
|
DirFlag = enum
|
|
hasArg, hasOptions, argIsFile, argIsWord
|
|
DirFlags = set[DirFlag]
|
|
|
|
proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags): PRstNode =
|
|
## Parses arguments and options for a directive block.
|
|
##
|
|
## A directive block will always have three sons: the arguments for the
|
|
## directive (rnDirArg), the options (rnFieldList) and the directive
|
|
## content block. This proc parses the two first nodes, the 3rd is left to
|
|
## the outer `parseDirective` call.
|
|
##
|
|
## Both rnDirArg and rnFieldList children nodes might be nil, so you need to
|
|
## check them before accessing.
|
|
result = newRstNodeA(p, k)
|
|
if k == rnCodeBlock: result.info = lineInfo(p)
|
|
var args: PRstNode = nil
|
|
var options: PRstNode = nil
|
|
if hasArg in flags:
|
|
args = newRstNode(rnDirArg)
|
|
if argIsFile in flags:
|
|
while true:
|
|
case currentTok(p).kind
|
|
of tkWord, tkOther, tkPunct, tkAdornment:
|
|
args.add(newLeaf(p))
|
|
inc p.idx
|
|
else: break
|
|
elif argIsWord in flags:
|
|
while currentTok(p).kind == tkWhite: inc p.idx
|
|
if currentTok(p).kind == tkWord:
|
|
args.add(newLeaf(p))
|
|
inc p.idx
|
|
else:
|
|
args = nil
|
|
else:
|
|
parseLine(p, args)
|
|
result.add(args)
|
|
if hasOptions in flags:
|
|
if currentTok(p).kind == tkIndent and currentTok(p).ival > currInd(p) and
|
|
nextTok(p).symbol == ":":
|
|
pushInd(p, currentTok(p).ival)
|
|
options = parseFields(p)
|
|
popInd(p)
|
|
result.add(options)
|
|
|
|
proc parseDirective(p: var RstParser, k: RstNodeKind, flags: DirFlags,
|
|
contentParser: SectionParser): PRstNode =
|
|
## A helper proc that does main work for specific directive procs.
|
|
## Always returns a generic rnDirective tree with these 3 children:
|
|
##
|
|
## 1) rnDirArg
|
|
## 2) rnFieldList
|
|
## 3) a node returned by `contentParser`.
|
|
##
|
|
## .. warning:: Any of the 3 children may be nil.
|
|
result = parseDirective(p, k, flags)
|
|
if not isNil(contentParser) and
|
|
parseBlockContent(p, result, contentParser):
|
|
discard "result is updated by parseBlockContent"
|
|
else:
|
|
result.add(PRstNode(nil))
|
|
|
|
proc parseDirBody(p: var RstParser, contentParser: SectionParser): PRstNode =
|
|
if indFollows(p):
|
|
pushInd(p, currentTok(p).ival)
|
|
result = contentParser(p)
|
|
popInd(p)
|
|
|
|
proc dirInclude(p: var RstParser): PRstNode =
|
|
##
|
|
## The following options are recognized:
|
|
##
|
|
## :start-after: text to find in the external data file
|
|
##
|
|
## Only the content after the first occurrence of the specified
|
|
## text will be included. If text is not found inclusion will
|
|
## start from beginning of the file
|
|
##
|
|
## :end-before: text to find in the external data file
|
|
##
|
|
## Only the content before the first occurrence of the specified
|
|
## text (but after any after text) will be included. If text is
|
|
## not found inclusion will happen until the end of the file.
|
|
#literal : flag (empty)
|
|
# The entire included text is inserted into the document as a single
|
|
# literal block (useful for program listings).
|
|
#encoding : name of text encoding
|
|
# The text encoding of the external data file. Defaults to the document's
|
|
# encoding (if specified).
|
|
#
|
|
result = nil
|
|
var n = parseDirective(p, rnDirective, {hasArg, argIsFile, hasOptions}, nil)
|
|
var filename = strip(addNodes(n.sons[0]))
|
|
var path = p.findRelativeFile(filename)
|
|
if path == "":
|
|
rstMessage(p, meCannotOpenFile, filename)
|
|
else:
|
|
# XXX: error handling; recursive file inclusion!
|
|
if getFieldValue(n, "literal") != "":
|
|
result = newRstNode(rnLiteralBlock)
|
|
result.add newLeaf(readFile(path))
|
|
else:
|
|
let inputString = readFile(path)
|
|
let startPosition =
|
|
block:
|
|
let searchFor = n.getFieldValue("start-after").strip()
|
|
if searchFor != "":
|
|
let pos = inputString.find(searchFor)
|
|
if pos != -1: pos + searchFor.len
|
|
else: 0
|
|
else:
|
|
0
|
|
|
|
let endPosition =
|
|
block:
|
|
let searchFor = n.getFieldValue("end-before").strip()
|
|
if searchFor != "":
|
|
let pos = inputString.find(searchFor, start = startPosition)
|
|
if pos != -1: pos - 1
|
|
else: 0
|
|
else:
|
|
inputString.len - 1
|
|
|
|
var q: RstParser
|
|
initParser(q, p.s)
|
|
let saveFileIdx = p.s.currFileIdx
|
|
setCurrFilename(p.s, path)
|
|
getTokens(
|
|
inputString[startPosition..endPosition],
|
|
q.tok)
|
|
# workaround a GCC bug; more like the interior pointer bug?
|
|
#if find(q.tok[high(q.tok)].symbol, "\0\x01\x02") > 0:
|
|
# InternalError("Too many binary zeros in include file")
|
|
result = parseDoc(q)
|
|
p.s.currFileIdx = saveFileIdx
|
|
|
|
proc dirCodeBlock(p: var RstParser, nimExtension = false): PRstNode =
|
|
## Parses a code block.
|
|
##
|
|
## Code blocks are rnDirective trees with a `kind` of rnCodeBlock. See the
|
|
## description of ``parseDirective`` for further structure information.
|
|
##
|
|
## Code blocks can come in two forms, the standard `code directive
|
|
## <http://docutils.sourceforge.net/docs/ref/rst/directives.html#code>`_ and
|
|
## the nim extension ``.. code-block::``. If the block is an extension, we
|
|
## want the default language syntax highlighting to be Nim, so we create a
|
|
## fake internal field to communicate with the generator. The field is named
|
|
## ``default-language``, which is unlikely to collide with a field specified
|
|
## by any random rst input file.
|
|
##
|
|
## As an extension this proc will process the ``file`` extension field and if
|
|
## present will replace the code block with the contents of the referenced
|
|
## file. This behaviour is disabled in sandboxed mode and can be re-enabled
|
|
## with the `roSandboxDisabled` flag.
|
|
result = parseDirective(p, rnCodeBlock, {hasArg, hasOptions}, parseLiteralBlock)
|
|
mayLoadFile(p, result)
|
|
|
|
# Extend the field block if we are using our custom Nim extension.
|
|
if nimExtension:
|
|
defaultCodeLangNim(p, result)
|
|
|
|
proc dirContainer(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnContainer, {hasArg}, parseSectionWrapper)
|
|
assert(result.len == 3)
|
|
|
|
proc dirImage(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnImage, {hasOptions, hasArg, argIsFile}, nil)
|
|
|
|
proc dirFigure(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnFigure, {hasOptions, hasArg, argIsFile},
|
|
parseSectionWrapper)
|
|
|
|
proc dirTitle(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnTitle, {hasArg}, nil)
|
|
|
|
proc dirContents(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnContents, {hasArg}, nil)
|
|
p.s.hasToc = true
|
|
|
|
proc dirIndex(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnIndex, {}, parseSectionWrapper)
|
|
|
|
proc dirAdmonition(p: var RstParser, d: string): PRstNode =
|
|
result = parseDirective(p, rnAdmonition, {}, parseSectionWrapper)
|
|
result.adType = d
|
|
|
|
proc dirDefaultRole(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnDefaultRole, {hasArg}, nil)
|
|
if result.sons[0].len == 0: p.s.currRole = defaultRole(p.s.options)
|
|
else:
|
|
assert result.sons[0].sons[0].kind == rnLeaf
|
|
p.s.currRole = result.sons[0].sons[0].text
|
|
p.s.currRoleKind = whichRole(p, p.s.currRole)
|
|
|
|
proc dirRole(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnDirective, {hasArg, hasOptions}, nil)
|
|
# just check that language is supported, TODO: real role association
|
|
let lang = getFieldValue(result, "language").strip
|
|
if lang != "" and getSourceLanguage(lang) == langNone:
|
|
rstMessage(p, mwUnsupportedLanguage, lang)
|
|
|
|
proc dirRawAux(p: var RstParser, result: var PRstNode, kind: RstNodeKind,
|
|
contentParser: SectionParser) =
|
|
var filename = getFieldValue(result, "file")
|
|
if filename.len > 0:
|
|
var path = p.findRelativeFile(filename)
|
|
if path.len == 0:
|
|
rstMessage(p, meCannotOpenFile, filename)
|
|
else:
|
|
var f = readFile(path)
|
|
result = newRstNode(kind)
|
|
result.add newLeaf(f)
|
|
else:
|
|
result = newRstNode(kind, result.sons)
|
|
result.add(parseDirBody(p, contentParser))
|
|
|
|
proc dirRaw(p: var RstParser): PRstNode =
|
|
#
|
|
#The following options are recognized:
|
|
#
|
|
#file : string (newlines removed)
|
|
# The local filesystem path of a raw data file to be included.
|
|
#
|
|
# html
|
|
# latex
|
|
result = parseDirective(p, rnDirective, {hasOptions, hasArg, argIsWord})
|
|
if result.sons[0] != nil:
|
|
if cmpIgnoreCase(result.sons[0].sons[0].text, "html") == 0:
|
|
dirRawAux(p, result, rnRawHtml, parseLiteralBlock)
|
|
elif cmpIgnoreCase(result.sons[0].sons[0].text, "latex") == 0:
|
|
dirRawAux(p, result, rnRawLatex, parseLiteralBlock)
|
|
else:
|
|
rstMessage(p, meInvalidDirective, result.sons[0].sons[0].text)
|
|
else:
|
|
dirRawAux(p, result, rnRaw, parseSectionWrapper)
|
|
|
|
proc dirImportdoc(p: var RstParser): PRstNode =
|
|
result = parseDirective(p, rnDirective, {}, parseLiteralBlock)
|
|
assert result.sons[2].kind == rnLiteralBlock
|
|
assert result.sons[2].sons[0].kind == rnLeaf
|
|
let filenames: seq[string] = split(result.sons[2].sons[0].text, seps = {','})
|
|
proc rmSpaces(s: string): string = s.split.join("")
|
|
for origFilename in filenames:
|
|
p.s.idxImports[origFilename.rmSpaces] = ImportdocInfo(fromInfo: lineInfo(p))
|
|
|
|
proc selectDir(p: var RstParser, d: string): PRstNode =
|
|
result = nil
|
|
let tok = p.tok[p.idx-2] # report on directive in ".. directive::"
|
|
if roSandboxDisabled notin p.s.options:
|
|
if d notin SandboxDirAllowlist:
|
|
rstMessage(p, meSandboxedDirective, d, tok.line, tok.col)
|
|
|
|
case d
|
|
of "admonition", "attention", "caution": result = dirAdmonition(p, d)
|
|
of "code": result = dirCodeBlock(p)
|
|
of "code-block": result = dirCodeBlock(p, nimExtension = true)
|
|
of "container": result = dirContainer(p)
|
|
of "contents": result = dirContents(p)
|
|
of "danger": result = dirAdmonition(p, d)
|
|
of "default-role": result = dirDefaultRole(p)
|
|
of "error": result = dirAdmonition(p, d)
|
|
of "figure": result = dirFigure(p)
|
|
of "hint": result = dirAdmonition(p, d)
|
|
of "image": result = dirImage(p)
|
|
of "important": result = dirAdmonition(p, d)
|
|
of "importdoc": result = dirImportdoc(p)
|
|
of "include": result = dirInclude(p)
|
|
of "index": result = dirIndex(p)
|
|
of "note": result = dirAdmonition(p, d)
|
|
of "raw":
|
|
if roSupportRawDirective in p.s.options:
|
|
result = dirRaw(p)
|
|
else:
|
|
rstMessage(p, meInvalidDirective, d)
|
|
of "role": result = dirRole(p)
|
|
of "tip": result = dirAdmonition(p, d)
|
|
of "title": result = dirTitle(p)
|
|
of "warning": result = dirAdmonition(p, d)
|
|
else:
|
|
rstMessage(p, meInvalidDirective, d, tok.line, tok.col)
|
|
|
|
proc parseDotDot(p: var RstParser): PRstNode =
|
|
# parse "explicit markup blocks"
|
|
result = nil
|
|
var n: PRstNode # to store result, workaround for bug 16855
|
|
var col = currentTok(p).col
|
|
inc p.idx
|
|
var d = getDirective(p)
|
|
if d != "":
|
|
pushInd(p, col)
|
|
result = selectDir(p, d)
|
|
popInd(p)
|
|
elif match(p, p.idx, " _"):
|
|
# hyperlink target:
|
|
inc p.idx, 2
|
|
var ending = ":"
|
|
if currentTok(p).symbol == "`":
|
|
inc p.idx
|
|
ending = "`"
|
|
var a = getReferenceName(p, ending)
|
|
if ending == "`":
|
|
if currentTok(p).symbol == ":":
|
|
inc p.idx
|
|
else:
|
|
rstMessage(p, meExpected, ":")
|
|
if currentTok(p).kind == tkWhite: inc p.idx
|
|
var b = untilEol(p)
|
|
if len(b) == 0: # set internal anchor
|
|
p.curAnchors.add ManualAnchor(
|
|
alias: linkName(a), anchor: rstnodeToRefname(a), info: prevLineInfo(p)
|
|
)
|
|
else: # external hyperlink
|
|
setRef(p, rstnodeToRefname(a), b, refType=hyperlinkAlias)
|
|
elif match(p, p.idx, " |"):
|
|
# substitution definitions:
|
|
inc p.idx, 2
|
|
var a = getReferenceName(p, "|")
|
|
var b: PRstNode
|
|
if currentTok(p).kind == tkWhite: inc p.idx
|
|
if cmpIgnoreStyle(currentTok(p).symbol, "replace") == 0:
|
|
inc p.idx
|
|
expect(p, "::")
|
|
b = untilEol(p)
|
|
elif cmpIgnoreStyle(currentTok(p).symbol, "image") == 0:
|
|
inc p.idx
|
|
b = dirImage(p)
|
|
else:
|
|
rstMessage(p, meInvalidDirective, currentTok(p).symbol)
|
|
setSub(p, addNodes(a), b)
|
|
elif match(p, p.idx, " [") and
|
|
(n = parseFootnote(p); n != nil):
|
|
result = n
|
|
else:
|
|
result = parseComment(p, col)
|
|
|
|
proc rstParsePass1*(fragment: string,
|
|
line, column: int,
|
|
sharedState: PRstSharedState): PRstNode =
|
|
## Parses an RST `fragment`.
|
|
## The result should be further processed by
|
|
## preparePass2_ and resolveSubs_ (which is pass 2).
|
|
var p: RstParser
|
|
initParser(p, sharedState)
|
|
p.line = line
|
|
p.col = column
|
|
getTokens(fragment, p.tok)
|
|
result = parseDoc(p)
|
|
|
|
proc extractLinkEnd(x: string): string =
|
|
## From links like `path/to/file.html#/%` extract `file.html#/%`.
|
|
let i = find(x, '#')
|
|
let last =
|
|
if i >= 0: i
|
|
else: x.len - 1
|
|
let j = rfind(x, '/', start=0, last=last)
|
|
if j >= 0:
|
|
result = x[j+1 .. ^1]
|
|
else:
|
|
result = x
|
|
|
|
proc loadIdxFile(s: var PRstSharedState, origFilename: string) =
|
|
doAssert roSandboxDisabled in s.options
|
|
var info: TLineInfo
|
|
info.fileIndex = addFilename(s, origFilename)
|
|
var (dir, basename, ext) = origFilename.splitFile
|
|
if ext notin [".md", ".rst", ".nim", ""]:
|
|
rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo,
|
|
meCannotOpenFile, origFilename & ": unknown extension")
|
|
let idxFilename = dir / basename & ".idx"
|
|
let (idxPath, linkRelPath) = s.findRefFile(idxFilename)
|
|
s.idxImports[origFilename].linkRelPath = linkRelPath
|
|
var
|
|
fileEntries: seq[IndexEntry]
|
|
title: IndexEntry
|
|
try:
|
|
(fileEntries, title) = parseIdxFile(idxPath)
|
|
except IOError:
|
|
rstMessage(s.filenames, s.msgHandler, s.idxImports[origFilename].fromInfo,
|
|
meCannotOpenFile, idxPath)
|
|
except ValueError as e:
|
|
s.msgHandler(idxPath, LineRstInit, ColRstInit, meInvalidField, e.msg)
|
|
|
|
var isMarkup = false # for sanity check to avoid mixing .md <-> .nim
|
|
for entry in fileEntries:
|
|
# Though target .idx already has inside it the path to HTML relative
|
|
# project's root, we won't rely on it and use `linkRelPath` instead.
|
|
let refn = extractLinkEnd(entry.link)
|
|
# select either markup (rst/md) or Nim cases:
|
|
if entry.kind in {ieMarkupTitle, ieNimTitle}:
|
|
s.idxImports[origFilename].title = entry.keyword
|
|
case entry.kind
|
|
of ieIdxRole, ieHeading, ieMarkupTitle:
|
|
if ext == ".nim" and entry.kind == ieMarkupTitle:
|
|
rstMessage(s, idxPath, meInvalidField,
|
|
$ieMarkupTitle & " in supposedly .nim-derived file")
|
|
if entry.kind == ieMarkupTitle:
|
|
isMarkup = true
|
|
info.line = entry.line.uint16
|
|
addAnchorExtRst(s, key = entry.keyword, refn = refn,
|
|
anchorType = headlineAnchor, info=info)
|
|
of ieNim, ieNimGroup, ieNimTitle:
|
|
if ext in [".md", ".rst"] or isMarkup:
|
|
rstMessage(s, idxPath, meInvalidField,
|
|
$entry.kind & " in supposedly markup-derived file")
|
|
s.nimFileImported = true
|
|
var langSym: LangSymbol
|
|
if entry.kind in {ieNim, ieNimTitle}:
|
|
var q: RstParser
|
|
initParser(q, s)
|
|
info.line = entry.line.uint16
|
|
setLen(q.tok, 0)
|
|
q.idx = 0
|
|
getTokens(entry.linkTitle, q.tok)
|
|
var sons = newSeq[PRstNode](q.tok.len)
|
|
for i in 0 ..< q.tok.len: sons[i] = newLeaf(q.tok[i].symbol)
|
|
let linkTitle = newRstNode(rnInner, sons)
|
|
langSym = linkTitle.toLangSymbol
|
|
else: # entry.kind == ieNimGroup
|
|
langSym = langSymbolGroup(kind=entry.linkTitle, name=entry.keyword)
|
|
addAnchorNim(s, external = true, refn = refn, tooltip = entry.linkDesc,
|
|
langSym = langSym, priority = -4, # lowest
|
|
info = info, module = info.fileIndex)
|
|
doAssert s.idxImports[origFilename].title != ""
|
|
|
|
proc preparePass2*(s: var PRstSharedState, mainNode: PRstNode, importdoc = true) =
|
|
## Records titles in node `mainNode` and orders footnotes.
|
|
countTitles(s, mainNode)
|
|
fixHeadlines(s)
|
|
orderFootnotes(s)
|
|
if importdoc:
|
|
for origFilename in s.idxImports.keys:
|
|
loadIdxFile(s, origFilename)
|
|
|
|
proc resolveLink(s: PRstSharedState, n: PRstNode) : PRstNode =
|
|
# Associate this link alias with its target and change node kind to
|
|
# rnHyperlink or rnInternalRef appropriately.
|
|
var desc, alias: PRstNode
|
|
if n.kind == rnPandocRef: # link like [desc][alias]
|
|
desc = n.sons[0]
|
|
alias = n.sons[1]
|
|
else: # n.kind == rnRstRef, link like `desc=alias`_
|
|
desc = n
|
|
alias = n
|
|
type LinkDef = object
|
|
ar: AnchorRule
|
|
priority: int
|
|
tooltip: string
|
|
target: PRstNode
|
|
info: TLineInfo
|
|
externFilename: string
|
|
# when external anchor: origin filename where anchor was defined
|
|
isTitle: bool
|
|
proc cmp(x, y: LinkDef): int =
|
|
result = cmp(x.priority, y.priority)
|
|
if result == 0:
|
|
result = cmp(x.target, y.target)
|
|
var foundLinks: seq[LinkDef]
|
|
let refn = rstnodeToRefname(alias)
|
|
var hyperlinks = findRef(s, refn)
|
|
for y in hyperlinks:
|
|
foundLinks.add LinkDef(ar: arHyperlink, priority: refPriority(y.kind),
|
|
target: y.value, info: y.info,
|
|
tooltip: "(" & $y.kind & ")")
|
|
let substRst = findMainAnchorRst(s, alias.addNodes, n.info)
|
|
template getExternFilename(subst: AnchorSubst): string =
|
|
if subst.kind == arExternalRst or
|
|
(subst.kind == arNim and subst.external):
|
|
getFilename(s, subst)
|
|
else: ""
|
|
for subst in substRst:
|
|
var refname, fullRefname: string
|
|
if subst.kind == arInternalRst:
|
|
refname = subst.target.anchor
|
|
fullRefname = refname
|
|
else: # arExternalRst
|
|
refname = subst.refnameExt
|
|
fullRefname = s.idxImports[getFilename(s, subst)].linkRelPath &
|
|
"/" & refname
|
|
let anchorType =
|
|
if subst.kind == arInternalRst: subst.anchorType
|
|
else: subst.anchorTypeExt # arExternalRst
|
|
foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority,
|
|
target: newLeaf(fullRefname),
|
|
info: subst.info,
|
|
externFilename: getExternFilename(subst),
|
|
isTitle: isDocumentationTitle(refname),
|
|
tooltip: "(" & $anchorType & ")")
|
|
# find anchors automatically generated from Nim symbols
|
|
if roNimFile in s.options or s.nimFileImported:
|
|
let substNim = findMainAnchorNim(s, signature=alias, n.info)
|
|
for subst in substNim:
|
|
let fullRefname =
|
|
if subst.external:
|
|
s.idxImports[getFilename(s, subst)].linkRelPath &
|
|
"/" & subst.refname
|
|
else: subst.refname
|
|
foundLinks.add LinkDef(ar: subst.kind, priority: subst.priority,
|
|
target: newLeaf(fullRefname),
|
|
externFilename: getExternFilename(subst),
|
|
isTitle: isDocumentationTitle(subst.refname),
|
|
info: subst.info, tooltip: subst.tooltip)
|
|
foundLinks.sort(cmp = cmp, order = Descending)
|
|
let aliasStr = addNodes(alias)
|
|
if foundLinks.len >= 1:
|
|
if foundLinks[0].externFilename != "":
|
|
s.idxImports[foundLinks[0].externFilename].used = true
|
|
let kind = if foundLinks[0].ar in {arHyperlink, arExternalRst}: rnHyperlink
|
|
elif foundLinks[0].ar == arNim:
|
|
if foundLinks[0].externFilename == "": rnNimdocRef
|
|
else: rnHyperlink
|
|
else: rnInternalRef
|
|
result = newRstNode(kind)
|
|
let documentName = # filename without ext for `.nim`, title for `.md`
|
|
if foundLinks[0].ar == arNim:
|
|
changeFileExt(foundLinks[0].externFilename.extractFilename, "")
|
|
elif foundLinks[0].externFilename != "":
|
|
s.idxImports[foundLinks[0].externFilename].title
|
|
else: foundLinks[0].externFilename.extractFilename
|
|
let linkText =
|
|
if foundLinks[0].externFilename != "":
|
|
if foundLinks[0].isTitle: newLeaf(addNodes(desc))
|
|
else: newLeaf(documentName & ": " & addNodes(desc))
|
|
else:
|
|
newRstNode(rnInner, desc.sons)
|
|
result.sons = @[linkText, foundLinks[0].target]
|
|
if kind == rnNimdocRef: result.tooltip = foundLinks[0].tooltip
|
|
if foundLinks.len > 1: # report ambiguous link
|
|
var targets = newSeq[string]()
|
|
for l in foundLinks:
|
|
var t = " "
|
|
if s.filenames.len > 1:
|
|
t.add getFilename(s.filenames, l.info.fileIndex)
|
|
let n = l.info.line
|
|
let c = l.info.col + ColRstOffset
|
|
t.add "($1, $2): $3" % [$n, $c, l.tooltip]
|
|
targets.add t
|
|
rstMessage(s.filenames, s.msgHandler, n.info, mwAmbiguousLink,
|
|
"`$1`\n clash:\n$2" % [
|
|
aliasStr, targets.join("\n")])
|
|
else: # nothing found
|
|
result = n
|
|
rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, aliasStr)
|
|
|
|
proc resolveSubs*(s: PRstSharedState, n: PRstNode): PRstNode =
|
|
## Makes pass 2 of RST parsing.
|
|
## Resolves substitutions and anchor aliases, groups footnotes.
|
|
## Takes input node `n` and returns the same node with recursive
|
|
## substitutions in `n.sons` to `result`.
|
|
result = n
|
|
if n == nil: return
|
|
case n.kind
|
|
of rnSubstitutionReferences:
|
|
var x = findSub(s, n)
|
|
if x >= 0:
|
|
result = s.subs[x].value
|
|
else:
|
|
var key = addNodes(n)
|
|
var e = getEnv(key)
|
|
if e != "": result = newLeaf(e)
|
|
else: rstMessage(s.filenames, s.msgHandler, n.info,
|
|
mwUnknownSubstitution, key)
|
|
of rnRstRef, rnPandocRef:
|
|
result = resolveLink(s, n)
|
|
of rnFootnote:
|
|
var (fnType, num) = getFootnoteType(s, n.sons[0])
|
|
case fnType
|
|
of fnManualNumber, fnCitation:
|
|
discard "no need to alter fixed text"
|
|
of fnAutoNumberLabel, fnAutoNumber:
|
|
if fnType == fnAutoNumberLabel:
|
|
let labelR = rstnodeToRefname(n.sons[0])
|
|
num = getFootnoteNum(s, labelR)
|
|
else:
|
|
num = getFootnoteNum(s, n.order)
|
|
var nn = newRstNode(rnInner)
|
|
nn.add newLeaf($num)
|
|
result.sons[0] = nn
|
|
of fnAutoSymbol:
|
|
let sym = getAutoSymbol(s, n.order)
|
|
n.sons[0].sons[0].text = sym
|
|
n.sons[1] = resolveSubs(s, n.sons[1])
|
|
of rnFootnoteRef:
|
|
var (fnType, num) = getFootnoteType(s, n.sons[0])
|
|
template addLabel(number: int | string) =
|
|
var nn = newRstNode(rnInner)
|
|
nn.add newLeaf($number)
|
|
result.add(nn)
|
|
var refn = fnType.prefix
|
|
# create new rnFootnoteRef, add final label, and finalize target refn:
|
|
result = newRstNode(rnFootnoteRef, info = n.info)
|
|
case fnType
|
|
of fnManualNumber:
|
|
addLabel num
|
|
refn.add $num
|
|
of fnAutoNumber:
|
|
inc s.currFootnoteNumRef
|
|
addLabel getFootnoteNum(s, s.currFootnoteNumRef)
|
|
refn.add $s.currFootnoteNumRef
|
|
of fnAutoNumberLabel:
|
|
addLabel getFootnoteNum(s, rstnodeToRefname(n))
|
|
refn.add rstnodeToRefname(n)
|
|
of fnAutoSymbol:
|
|
inc s.currFootnoteSymRef
|
|
addLabel getAutoSymbol(s, s.currFootnoteSymRef)
|
|
refn.add $s.currFootnoteSymRef
|
|
of fnCitation:
|
|
result.add n.sons[0]
|
|
refn.add rstnodeToRefname(n)
|
|
# TODO: correctly report ambiguities
|
|
let anchorInfo = findMainAnchorRst(s, refn, n.info)
|
|
if anchorInfo.len != 0:
|
|
result.add newLeaf(anchorInfo[0].target.anchor) # add link
|
|
else:
|
|
rstMessage(s.filenames, s.msgHandler, n.info, mwBrokenLink, refn)
|
|
result.add newLeaf(refn) # add link
|
|
of rnLeaf:
|
|
discard
|
|
else:
|
|
var regroup = false
|
|
for i in 0 ..< n.len:
|
|
n.sons[i] = resolveSubs(s, n.sons[i])
|
|
if n.sons[i] != nil and n.sons[i].kind == rnFootnote:
|
|
regroup = true
|
|
if regroup: # group footnotes together into rnFootnoteGroup
|
|
var newSons: seq[PRstNode]
|
|
var i = 0
|
|
while i < n.len:
|
|
if n.sons[i] != nil and n.sons[i].kind == rnFootnote:
|
|
var grp = newRstNode(rnFootnoteGroup)
|
|
while i < n.len and n.sons[i].kind == rnFootnote:
|
|
grp.sons.add n.sons[i]
|
|
inc i
|
|
newSons.add grp
|
|
else:
|
|
newSons.add n.sons[i]
|
|
inc i
|
|
result.sons = newSons
|
|
|
|
proc completePass2*(s: PRstSharedState) =
|
|
for (filename, importdocInfo) in s.idxImports.pairs:
|
|
if not importdocInfo.used:
|
|
rstMessage(s.filenames, s.msgHandler, importdocInfo.fromInfo,
|
|
mwUnusedImportdoc, filename)
|
|
|
|
proc rstParse*(text, filename: string,
|
|
line, column: int,
|
|
options: RstParseOptions,
|
|
findFile: FindFileHandler = nil,
|
|
findRefFile: FindRefFileHandler = nil,
|
|
msgHandler: MsgHandler = nil):
|
|
tuple[node: PRstNode, filenames: RstFileTable, hasToc: bool] =
|
|
## Parses the whole `text`. The result is ready for `rstgen.renderRstToOut`,
|
|
## note that 2nd tuple element should be fed to `initRstGenerator`
|
|
## argument `filenames` (it is being filled here at least with `filename`
|
|
## and possibly with other files from RST ``.. include::`` statement).
|
|
var sharedState = newRstSharedState(options, filename, findFile, findRefFile,
|
|
msgHandler, hasToc=false)
|
|
let unresolved = rstParsePass1(text, line, column, sharedState)
|
|
preparePass2(sharedState, unresolved)
|
|
result.node = resolveSubs(sharedState, unresolved)
|
|
completePass2(sharedState)
|
|
result.filenames = sharedState.filenames
|
|
result.hasToc = sharedState.hasToc
|