mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-04 20:17:42 +00:00
@@ -1,49 +0,0 @@
|
||||
#
|
||||
#
|
||||
# The Nimrod Compiler
|
||||
# (c) Copyright 2012 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
const
|
||||
CharSize* = SizeOf(Char)
|
||||
Lrz* = ' '
|
||||
Apo* = '\''
|
||||
Tabulator* = '\x09'
|
||||
ESC* = '\x1B'
|
||||
CR* = '\x0D'
|
||||
FF* = '\x0C'
|
||||
LF* = '\x0A'
|
||||
BEL* = '\x07'
|
||||
BACKSPACE* = '\x08'
|
||||
VT* = '\x0B'
|
||||
|
||||
when defined(macos):
|
||||
DirSep == ':'
|
||||
"\n" == CR & ""
|
||||
FirstNLchar == CR
|
||||
PathSep == ';' # XXX: is this correct?
|
||||
else:
|
||||
when defined(unix):
|
||||
DirSep == '/'
|
||||
"\n" == LF & ""
|
||||
FirstNLchar == LF
|
||||
PathSep == ':'
|
||||
else:
|
||||
# windows, dos
|
||||
DirSep == '\\'
|
||||
"\n" == CR + LF
|
||||
FirstNLchar == CR
|
||||
DriveSeparator == ':'
|
||||
PathSep == ';'
|
||||
UpLetters == {'A'..'Z', '\xC0'..'\xDE'}
|
||||
DownLetters == {'a'..'z', '\xDF'..'\xFF'}
|
||||
Numbers == {'0'..'9'}
|
||||
Letters == UpLetters + DownLetters
|
||||
type
|
||||
TCharSet* = set[Char]
|
||||
PCharSet* = ref TCharSet
|
||||
|
||||
# implementation
|
||||
@@ -1,346 +0,0 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2012 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
# A HIGH-PERFORMANCE configuration file parser;
|
||||
# the Nimrod version of this file is part of the
|
||||
# standard library.
|
||||
|
||||
import
|
||||
llstream, nhashes, strutils, nimlexbase
|
||||
|
||||
type
|
||||
TCfgEventKind* = enum
|
||||
cfgEof, # end of file reached
|
||||
cfgSectionStart, # a ``[section]`` has been parsed
|
||||
cfgKeyValuePair, # a ``key=value`` pair has been detected
|
||||
cfgOption, # a ``--key=value`` command line option
|
||||
cfgError # an error ocurred during parsing; msg contains the
|
||||
# error message
|
||||
TCfgEvent* = object of TObject
|
||||
case kind*: TCfgEventKind
|
||||
of cfgEof:
|
||||
nil
|
||||
|
||||
of cfgSectionStart:
|
||||
section*: string
|
||||
|
||||
of cfgKeyValuePair, cfgOption:
|
||||
key*, value*: string
|
||||
|
||||
of cfgError:
|
||||
msg*: string
|
||||
|
||||
|
||||
TTokKind* = enum
|
||||
tkInvalid, tkEof, # order is important here!
|
||||
tkSymbol, tkEquals, tkColon, tkBracketLe, tkBracketRi, tkDashDash
|
||||
TToken*{.final.} = object # a token
|
||||
kind*: TTokKind # the type of the token
|
||||
literal*: string # the parsed (string) literal
|
||||
|
||||
TParserState* = enum
|
||||
startState, commaState
|
||||
TCfgParser* = object of TBaseLexer
|
||||
tok*: TToken
|
||||
state*: TParserState
|
||||
filename*: string
|
||||
|
||||
|
||||
proc Open*(c: var TCfgParser, filename: string, inputStream: PLLStream)
|
||||
proc Close*(c: var TCfgParser)
|
||||
proc next*(c: var TCfgParser): TCfgEvent
|
||||
proc getColumn*(c: TCfgParser): int
|
||||
proc getLine*(c: TCfgParser): int
|
||||
proc getFilename*(c: TCfgParser): string
|
||||
proc errorStr*(c: TCfgParser, msg: string): string
|
||||
# implementation
|
||||
|
||||
const
|
||||
SymChars: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'} #
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
proc rawGetTok(c: var TCfgParser, tok: var TToken)
|
||||
proc open(c: var TCfgParser, filename: string, inputStream: PLLStream) =
|
||||
openBaseLexer(c, inputStream)
|
||||
c.filename = filename
|
||||
c.state = startState
|
||||
c.tok.kind = tkInvalid
|
||||
c.tok.literal = ""
|
||||
rawGetTok(c, c.tok)
|
||||
|
||||
proc close(c: var TCfgParser) =
|
||||
closeBaseLexer(c)
|
||||
|
||||
proc getColumn(c: TCfgParser): int =
|
||||
result = getColNumber(c, c.bufPos)
|
||||
|
||||
proc getLine(c: TCfgParser): int =
|
||||
result = c.linenumber
|
||||
|
||||
proc getFilename(c: TCfgParser): string =
|
||||
result = c.filename
|
||||
|
||||
proc handleHexChar(c: var TCfgParser, xi: var int) =
|
||||
case c.buf[c.bufpos]
|
||||
of '0'..'9':
|
||||
xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'))
|
||||
inc(c.bufpos)
|
||||
of 'a'..'f':
|
||||
xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10)
|
||||
inc(c.bufpos)
|
||||
of 'A'..'F':
|
||||
xi = (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10)
|
||||
inc(c.bufpos)
|
||||
else:
|
||||
nil
|
||||
|
||||
proc handleDecChars(c: var TCfgParser, xi: var int) =
|
||||
while c.buf[c.bufpos] in {'0'..'9'}:
|
||||
xi = (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0'))
|
||||
inc(c.bufpos)
|
||||
|
||||
proc getEscapedChar(c: var TCfgParser, tok: var TToken) =
|
||||
var xi: int
|
||||
inc(c.bufpos) # skip '\'
|
||||
case c.buf[c.bufpos]
|
||||
of 'n', 'N':
|
||||
tok.literal = tok.literal & "\n"
|
||||
Inc(c.bufpos)
|
||||
of 'r', 'R', 'c', 'C':
|
||||
add(tok.literal, CR)
|
||||
Inc(c.bufpos)
|
||||
of 'l', 'L':
|
||||
add(tok.literal, LF)
|
||||
Inc(c.bufpos)
|
||||
of 'f', 'F':
|
||||
add(tok.literal, FF)
|
||||
inc(c.bufpos)
|
||||
of 'e', 'E':
|
||||
add(tok.literal, ESC)
|
||||
Inc(c.bufpos)
|
||||
of 'a', 'A':
|
||||
add(tok.literal, BEL)
|
||||
Inc(c.bufpos)
|
||||
of 'b', 'B':
|
||||
add(tok.literal, BACKSPACE)
|
||||
Inc(c.bufpos)
|
||||
of 'v', 'V':
|
||||
add(tok.literal, VT)
|
||||
Inc(c.bufpos)
|
||||
of 't', 'T':
|
||||
add(tok.literal, Tabulator)
|
||||
Inc(c.bufpos)
|
||||
of '\'', '\"':
|
||||
add(tok.literal, c.buf[c.bufpos])
|
||||
Inc(c.bufpos)
|
||||
of '\\':
|
||||
add(tok.literal, '\\')
|
||||
Inc(c.bufpos)
|
||||
of 'x', 'X':
|
||||
inc(c.bufpos)
|
||||
xi = 0
|
||||
handleHexChar(c, xi)
|
||||
handleHexChar(c, xi)
|
||||
add(tok.literal, Chr(xi))
|
||||
of '0'..'9':
|
||||
xi = 0
|
||||
handleDecChars(c, xi)
|
||||
if (xi <= 255): add(tok.literal, Chr(xi))
|
||||
else: tok.kind = tkInvalid
|
||||
else: tok.kind = tkInvalid
|
||||
|
||||
proc HandleCRLF(c: var TCfgParser, pos: int): int =
|
||||
case c.buf[pos]
|
||||
of CR: result = lexbase.HandleCR(c, pos)
|
||||
of LF: result = lexbase.HandleLF(c, pos)
|
||||
else: result = pos
|
||||
|
||||
proc getString(c: var TCfgParser, tok: var TToken, rawMode: bool) =
|
||||
var
|
||||
pos: int
|
||||
ch: Char
|
||||
buf: cstring
|
||||
pos = c.bufPos + 1 # skip "
|
||||
buf = c.buf # put `buf` in a register
|
||||
tok.kind = tkSymbol
|
||||
if (buf[pos] == '\"') and (buf[pos + 1] == '\"'):
|
||||
# long string literal:
|
||||
inc(pos, 2) # skip ""
|
||||
# skip leading newline:
|
||||
pos = HandleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of '\"':
|
||||
if (buf[pos + 1] == '\"') and (buf[pos + 2] == '\"'): break
|
||||
add(tok.literal, '\"')
|
||||
Inc(pos)
|
||||
of CR, LF:
|
||||
pos = HandleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
tok.literal = tok.literal & "\n"
|
||||
of lexbase.EndOfFile:
|
||||
tok.kind = tkInvalid
|
||||
break
|
||||
else:
|
||||
add(tok.literal, buf[pos])
|
||||
Inc(pos)
|
||||
c.bufpos = pos +
|
||||
3 # skip the three """
|
||||
else:
|
||||
# ordinary string literal
|
||||
while true:
|
||||
ch = buf[pos]
|
||||
if ch == '\"':
|
||||
inc(pos) # skip '"'
|
||||
break
|
||||
if ch in {CR, LF, lexbase.EndOfFile}:
|
||||
tok.kind = tkInvalid
|
||||
break
|
||||
if (ch == '\\') and not rawMode:
|
||||
c.bufPos = pos
|
||||
getEscapedChar(c, tok)
|
||||
pos = c.bufPos
|
||||
else:
|
||||
add(tok.literal, ch)
|
||||
Inc(pos)
|
||||
c.bufpos = pos
|
||||
|
||||
proc getSymbol(c: var TCfgParser, tok: var TToken) =
|
||||
var
|
||||
pos: int
|
||||
buf: cstring
|
||||
pos = c.bufpos
|
||||
buf = c.buf
|
||||
while true:
|
||||
add(tok.literal, buf[pos])
|
||||
Inc(pos)
|
||||
if not (buf[pos] in SymChars): break
|
||||
c.bufpos = pos
|
||||
tok.kind = tkSymbol
|
||||
|
||||
proc skip(c: var TCfgParser) =
|
||||
var
|
||||
buf: cstring
|
||||
pos: int
|
||||
pos = c.bufpos
|
||||
buf = c.buf
|
||||
while true:
|
||||
case buf[pos]
|
||||
of ' ':
|
||||
Inc(pos)
|
||||
of Tabulator:
|
||||
inc(pos)
|
||||
of '#', ';':
|
||||
while not (buf[pos] in {CR, LF, lexbase.EndOfFile}): inc(pos)
|
||||
of CR, LF:
|
||||
pos = HandleCRLF(c, pos)
|
||||
buf = c.buf
|
||||
else:
|
||||
break # EndOfFile also leaves the loop
|
||||
c.bufpos = pos
|
||||
|
||||
proc rawGetTok(c: var TCfgParser, tok: var TToken) =
|
||||
tok.kind = tkInvalid
|
||||
setlen(tok.literal, 0)
|
||||
skip(c)
|
||||
case c.buf[c.bufpos]
|
||||
of '=':
|
||||
tok.kind = tkEquals
|
||||
inc(c.bufpos)
|
||||
tok.literal = "="
|
||||
of '-':
|
||||
inc(c.bufPos)
|
||||
if c.buf[c.bufPos] == '-': inc(c.bufPos)
|
||||
tok.kind = tkDashDash
|
||||
tok.literal = "--"
|
||||
of ':':
|
||||
tok.kind = tkColon
|
||||
inc(c.bufpos)
|
||||
tok.literal = ":"
|
||||
of 'r', 'R':
|
||||
if c.buf[c.bufPos + 1] == '\"':
|
||||
Inc(c.bufPos)
|
||||
getString(c, tok, true)
|
||||
else:
|
||||
getSymbol(c, tok)
|
||||
of '[':
|
||||
tok.kind = tkBracketLe
|
||||
inc(c.bufpos)
|
||||
tok.literal = "["
|
||||
of ']':
|
||||
tok.kind = tkBracketRi
|
||||
Inc(c.bufpos)
|
||||
tok.literal = "]"
|
||||
of '\"':
|
||||
getString(c, tok, false)
|
||||
of lexbase.EndOfFile:
|
||||
tok.kind = tkEof
|
||||
else: getSymbol(c, tok)
|
||||
|
||||
proc errorStr(c: TCfgParser, msg: string): string =
|
||||
result = `%`("$1($2, $3) Error: $4",
|
||||
[c.filename, $(getLine(c)), $(getColumn(c)), msg])
|
||||
|
||||
proc getKeyValPair(c: var TCfgParser, kind: TCfgEventKind): TCfgEvent =
|
||||
if c.tok.kind == tkSymbol:
|
||||
result.kind = kind
|
||||
result.key = c.tok.literal
|
||||
result.value = ""
|
||||
rawGetTok(c, c.tok)
|
||||
while c.tok.literal == ".":
|
||||
add(result.key, '.')
|
||||
rawGetTok(c, c.tok)
|
||||
if c.tok.kind == tkSymbol:
|
||||
add(result.key, c.tok.literal)
|
||||
rawGetTok(c, c.tok)
|
||||
else:
|
||||
result.kind = cfgError
|
||||
result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal)
|
||||
break
|
||||
if c.tok.kind in {tkEquals, tkColon}:
|
||||
rawGetTok(c, c.tok)
|
||||
if c.tok.kind == tkSymbol:
|
||||
result.value = c.tok.literal
|
||||
else:
|
||||
result.kind = cfgError
|
||||
result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal)
|
||||
rawGetTok(c, c.tok)
|
||||
else:
|
||||
result.kind = cfgError
|
||||
result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal)
|
||||
rawGetTok(c, c.tok)
|
||||
|
||||
proc next(c: var TCfgParser): TCfgEvent =
|
||||
case c.tok.kind
|
||||
of tkEof:
|
||||
result.kind = cfgEof
|
||||
of tkDashDash:
|
||||
rawGetTok(c, c.tok)
|
||||
result = getKeyValPair(c, cfgOption)
|
||||
of tkSymbol:
|
||||
result = getKeyValPair(c, cfgKeyValuePair)
|
||||
of tkBracketLe:
|
||||
rawGetTok(c, c.tok)
|
||||
if c.tok.kind == tkSymbol:
|
||||
result.kind = cfgSectionStart
|
||||
result.section = c.tok.literal
|
||||
else:
|
||||
result.kind = cfgError
|
||||
result.msg = errorStr(c, "symbol expected, but found: " & c.tok.literal)
|
||||
rawGetTok(c, c.tok)
|
||||
if c.tok.kind == tkBracketRi:
|
||||
rawGetTok(c, c.tok)
|
||||
else:
|
||||
result.kind = cfgError
|
||||
result.msg = errorStr(c, "\']\' expected, but found: " & c.tok.literal)
|
||||
of tkInvalid, tkBracketRi, tkEquals, tkColon:
|
||||
result.kind = cfgError
|
||||
result.msg = errorStr(c, "invalid token: " & c.tok.literal)
|
||||
rawGetTok(c, c.tok)
|
||||
@@ -1,18 +0,0 @@
|
||||
#
|
||||
#
|
||||
# The Nimrod Compiler
|
||||
# (c) Copyright 2012 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
import
|
||||
llstream, lexer, parser, idents, strutils, ast, msgs
|
||||
|
||||
proc ParseAll*(p: var TParser): PNode =
|
||||
result = nil
|
||||
|
||||
proc parseTopLevelStmt*(p: var TParser): PNode =
|
||||
result = nil
|
||||
|
||||
@@ -62,7 +62,6 @@ proc parseAll(p: var TParsers): PNode =
|
||||
of skinEndX:
|
||||
internalError("parser to implement")
|
||||
result = ast.emptyNode
|
||||
# skinEndX: result := pendx.parseAll(p.parser);
|
||||
|
||||
proc parseTopLevelStmt(p: var TParsers): PNode =
|
||||
case p.skin
|
||||
@@ -73,7 +72,6 @@ proc parseTopLevelStmt(p: var TParsers): PNode =
|
||||
of skinEndX:
|
||||
internalError("parser to implement")
|
||||
result = ast.emptyNode
|
||||
#skinEndX: result := pendx.parseTopLevelStmt(p.parser);
|
||||
|
||||
proc utf8Bom(s: string): int =
|
||||
if (s[0] == '\xEF') and (s[1] == '\xBB') and (s[2] == '\xBF'):
|
||||
|
||||
Reference in New Issue
Block a user