mirror of
https://github.com/nim-lang/Nim.git
synced 2026-06-07 12:24:19 +00:00
425 lines
11 KiB
ObjectPascal
Executable File
425 lines
11 KiB
ObjectPascal
Executable File
//
|
|
//
|
|
// Nimrod's Runtime Library
|
|
// (c) Copyright 2009 Andreas Rumpf
|
|
//
|
|
// See the file "copying.txt", included in this
|
|
// distribution, for details about the copyright.
|
|
//
|
|
unit parsecfg;
|
|
|
|
// A HIGH-PERFORMANCE configuration file parser;
|
|
// the Nimrod version of this file is part of the
|
|
// standard library.
|
|
|
|
interface
|
|
|
|
{$include 'config.inc'}
|
|
|
|
uses
|
|
nsystem, charsets, llstream, sysutils, nhashes, strutils, lexbase;
|
|
|
|
type
|
|
TCfgEventKind = (
|
|
cfgEof, // end of file reached
|
|
cfgSectionStart, // a ``[section]`` has been parsed
|
|
cfgKeyValuePair, // a ``key=value`` pair has been detected
|
|
cfgOption, // a ``--key=value`` command line option
|
|
cfgError // an error ocurred during parsing; msg contains the
|
|
// error message
|
|
);
|
|
TCfgEvent = {@ignore} record
|
|
kind: TCfgEventKind;
|
|
section: string;
|
|
key, value: string;
|
|
msg: string;
|
|
end;
|
|
{@emit object(NObject)
|
|
case kind: TCfgEventKind of
|
|
cfgEof: ();
|
|
cfgSectionStart: (section: string);
|
|
cfgKeyValuePair, cfgOption: (key, value: string);
|
|
cfgError: (msg: string);
|
|
end;}
|
|
TTokKind = (tkInvalid, tkEof, // order is important here!
|
|
tkSymbol, tkEquals, tkColon,
|
|
tkBracketLe, tkBracketRi, tkDashDash
|
|
);
|
|
TToken = record // a token
|
|
kind: TTokKind; // the type of the token
|
|
literal: string; // the parsed (string) literal
|
|
end;
|
|
TParserState = (startState, commaState);
|
|
TCfgParser = object(TBaseLexer)
|
|
tok: TToken;
|
|
state: TParserState;
|
|
filename: string;
|
|
end;
|
|
|
|
procedure Open(var c: TCfgParser; const filename: string;
|
|
inputStream: PLLStream);
|
|
procedure Close(var c: TCfgParser);
|
|
|
|
function next(var c: TCfgParser): TCfgEvent;
|
|
|
|
function getColumn(const c: TCfgParser): int;
|
|
function getLine(const c: TCfgParser): int;
|
|
function getFilename(const c: TCfgParser): string;
|
|
|
|
function errorStr(const c: TCfgParser; const msg: string): string;
|
|
|
|
implementation
|
|
|
|
const
|
|
SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', '_', #128..#255];
|
|
|
|
// ----------------------------------------------------------------------------
|
|
procedure rawGetTok(var c: TCfgParser; var tok: TToken); forward;
|
|
|
|
procedure open(var c: TCfgParser; const filename: string;
|
|
inputStream: PLLStream);
|
|
begin
|
|
{@ignore}
|
|
FillChar(c, sizeof(c), 0);
|
|
{@emit}
|
|
openBaseLexer(c, inputStream);
|
|
c.filename := filename;
|
|
c.state := startState;
|
|
c.tok.kind := tkInvalid;
|
|
c.tok.literal := '';
|
|
rawGetTok(c, c.tok);
|
|
end;
|
|
|
|
procedure close(var c: TCfgParser);
|
|
begin
|
|
closeBaseLexer(c);
|
|
end;
|
|
|
|
function getColumn(const c: TCfgParser): int;
|
|
begin
|
|
result := getColNumber(c, c.bufPos)
|
|
end;
|
|
|
|
function getLine(const c: TCfgParser): int;
|
|
begin
|
|
result := c.linenumber
|
|
end;
|
|
|
|
function getFilename(const c: TCfgParser): string;
|
|
begin
|
|
result := c.filename
|
|
end;
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
procedure handleHexChar(var c: TCfgParser; var xi: int);
|
|
begin
|
|
case c.buf[c.bufpos] of
|
|
'0'..'9': begin
|
|
xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('0'));
|
|
inc(c.bufpos);
|
|
end;
|
|
'a'..'f': begin
|
|
xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('a') + 10);
|
|
inc(c.bufpos);
|
|
end;
|
|
'A'..'F': begin
|
|
xi := (xi shl 4) or (ord(c.buf[c.bufpos]) - ord('A') + 10);
|
|
inc(c.bufpos);
|
|
end;
|
|
else begin end // do nothing
|
|
end
|
|
end;
|
|
|
|
procedure handleDecChars(var c: TCfgParser; var xi: int);
|
|
begin
|
|
while c.buf[c.bufpos] in ['0'..'9'] do begin
|
|
xi := (xi * 10) + (ord(c.buf[c.bufpos]) - ord('0'));
|
|
inc(c.bufpos);
|
|
end;
|
|
end;
|
|
|
|
procedure getEscapedChar(var c: TCfgParser; var tok: TToken);
|
|
var
|
|
xi: int;
|
|
begin
|
|
inc(c.bufpos); // skip '\'
|
|
case c.buf[c.bufpos] of
|
|
'n', 'N': begin
|
|
tok.literal := tok.literal +{&} nl;
|
|
Inc(c.bufpos);
|
|
end;
|
|
'r', 'R', 'c', 'C': begin addChar(tok.literal, CR); Inc(c.bufpos); end;
|
|
'l', 'L': begin addChar(tok.literal, LF); Inc(c.bufpos); end;
|
|
'f', 'F': begin addChar(tok.literal, FF); inc(c.bufpos); end;
|
|
'e', 'E': begin addChar(tok.literal, ESC); Inc(c.bufpos); end;
|
|
'a', 'A': begin addChar(tok.literal, BEL); Inc(c.bufpos); end;
|
|
'b', 'B': begin addChar(tok.literal, BACKSPACE); Inc(c.bufpos); end;
|
|
'v', 'V': begin addChar(tok.literal, VT); Inc(c.bufpos); end;
|
|
't', 'T': begin addChar(tok.literal, Tabulator); Inc(c.bufpos); end;
|
|
'''', '"': begin addChar(tok.literal, c.buf[c.bufpos]); Inc(c.bufpos); end;
|
|
'\': begin addChar(tok.literal, '\'); Inc(c.bufpos) end;
|
|
'x', 'X': begin
|
|
inc(c.bufpos);
|
|
xi := 0;
|
|
handleHexChar(c, xi);
|
|
handleHexChar(c, xi);
|
|
addChar(tok.literal, Chr(xi));
|
|
end;
|
|
'0'..'9': begin
|
|
xi := 0;
|
|
handleDecChars(c, xi);
|
|
if (xi <= 255) then
|
|
addChar(tok.literal, Chr(xi))
|
|
else
|
|
tok.kind := tkInvalid
|
|
end
|
|
else tok.kind := tkInvalid
|
|
end
|
|
end;
|
|
|
|
function HandleCRLF(var c: TCfgParser; pos: int): int;
|
|
begin
|
|
case c.buf[pos] of
|
|
CR: result := lexbase.HandleCR(c, pos);
|
|
LF: result := lexbase.HandleLF(c, pos);
|
|
else result := pos
|
|
end
|
|
end;
|
|
|
|
procedure getString(var c: TCfgParser; var tok: TToken; rawMode: Boolean);
|
|
var
|
|
pos: int;
|
|
ch: Char;
|
|
buf: PChar;
|
|
begin
|
|
pos := c.bufPos + 1; // skip "
|
|
buf := c.buf; // put `buf` in a register
|
|
tok.kind := tkSymbol;
|
|
if (buf[pos] = '"') and (buf[pos+1] = '"') then begin
|
|
// long string literal:
|
|
inc(pos, 2); // skip ""
|
|
// skip leading newline:
|
|
pos := HandleCRLF(c, pos);
|
|
buf := c.buf;
|
|
repeat
|
|
case buf[pos] of
|
|
'"': begin
|
|
if (buf[pos+1] = '"') and (buf[pos+2] = '"') then
|
|
break;
|
|
addChar(tok.literal, '"');
|
|
Inc(pos)
|
|
end;
|
|
CR, LF: begin
|
|
pos := HandleCRLF(c, pos);
|
|
buf := c.buf;
|
|
tok.literal := tok.literal +{&} nl;
|
|
end;
|
|
lexbase.EndOfFile: begin
|
|
tok.kind := tkInvalid;
|
|
break
|
|
end
|
|
else begin
|
|
addChar(tok.literal, buf[pos]);
|
|
Inc(pos)
|
|
end
|
|
end
|
|
until false;
|
|
c.bufpos := pos + 3 // skip the three """
|
|
end
|
|
else begin // ordinary string literal
|
|
repeat
|
|
ch := buf[pos];
|
|
if ch = '"' then begin
|
|
inc(pos); // skip '"'
|
|
break
|
|
end;
|
|
if ch in [CR, LF, lexbase.EndOfFile] then begin
|
|
tok.kind := tkInvalid;
|
|
break
|
|
end;
|
|
if (ch = '\') and not rawMode then begin
|
|
c.bufPos := pos;
|
|
getEscapedChar(c, tok);
|
|
pos := c.bufPos;
|
|
end
|
|
else begin
|
|
addChar(tok.literal, ch);
|
|
Inc(pos)
|
|
end
|
|
until false;
|
|
c.bufpos := pos;
|
|
end
|
|
end;
|
|
|
|
procedure getSymbol(var c: TCfgParser; var tok: TToken);
|
|
var
|
|
pos: int;
|
|
buf: pchar;
|
|
begin
|
|
pos := c.bufpos;
|
|
buf := c.buf;
|
|
while true do begin
|
|
addChar(tok.literal, buf[pos]);
|
|
Inc(pos);
|
|
if not (buf[pos] in SymChars) then break;
|
|
end;
|
|
c.bufpos := pos;
|
|
tok.kind := tkSymbol
|
|
end;
|
|
|
|
procedure skip(var c: TCfgParser);
|
|
var
|
|
buf: PChar;
|
|
pos: int;
|
|
begin
|
|
pos := c.bufpos;
|
|
buf := c.buf;
|
|
repeat
|
|
case buf[pos] of
|
|
' ': Inc(pos);
|
|
Tabulator: inc(pos);
|
|
'#', ';': while not (buf[pos] in [CR, LF, lexbase.EndOfFile]) do inc(pos);
|
|
CR, LF: begin
|
|
pos := HandleCRLF(c, pos);
|
|
buf := c.buf;
|
|
end
|
|
else break // EndOfFile also leaves the loop
|
|
end
|
|
until false;
|
|
c.bufpos := pos;
|
|
end;
|
|
|
|
procedure rawGetTok(var c: TCfgParser; var tok: TToken);
|
|
begin
|
|
tok.kind := tkInvalid;
|
|
setLength(tok.literal, 0);
|
|
skip(c);
|
|
case c.buf[c.bufpos] of
|
|
'=': begin
|
|
tok.kind := tkEquals;
|
|
inc(c.bufpos);
|
|
tok.literal := '='+'';
|
|
end;
|
|
'-': begin
|
|
inc(c.bufPos);
|
|
if c.buf[c.bufPos] = '-' then inc(c.bufPos);
|
|
tok.kind := tkDashDash;
|
|
tok.literal := '--';
|
|
end;
|
|
':': begin
|
|
tok.kind := tkColon;
|
|
inc(c.bufpos);
|
|
tok.literal := ':'+'';
|
|
end;
|
|
'r', 'R': begin
|
|
if c.buf[c.bufPos+1] = '"' then begin
|
|
Inc(c.bufPos);
|
|
getString(c, tok, true);
|
|
end
|
|
else
|
|
getSymbol(c, tok);
|
|
end;
|
|
'[': begin
|
|
tok.kind := tkBracketLe;
|
|
inc(c.bufpos);
|
|
tok.literal := '['+'';
|
|
end;
|
|
']': begin
|
|
tok.kind := tkBracketRi;
|
|
Inc(c.bufpos);
|
|
tok.literal := ']'+'';
|
|
end;
|
|
'"': getString(c, tok, false);
|
|
lexbase.EndOfFile: tok.kind := tkEof;
|
|
else getSymbol(c, tok);
|
|
end
|
|
end;
|
|
|
|
function errorStr(const c: TCfgParser; const msg: string): string;
|
|
begin
|
|
result := format('$1($2, $3) Error: $4', [
|
|
c.filename, toString(getLine(c)), toString(getColumn(c)),
|
|
msg
|
|
]);
|
|
end;
|
|
|
|
function getKeyValPair(var c: TCfgParser; kind: TCfgEventKind): TCfgEvent;
|
|
begin
|
|
if c.tok.kind = tkSymbol then begin
|
|
result.kind := kind;
|
|
result.key := c.tok.literal;
|
|
result.value := '';
|
|
rawGetTok(c, c.tok);
|
|
while c.tok.literal = '.'+'' do begin
|
|
addChar(result.key, '.');
|
|
rawGetTok(c, c.tok);
|
|
if c.tok.kind = tkSymbol then begin
|
|
add(result.key, c.tok.literal);
|
|
rawGetTok(c, c.tok);
|
|
end
|
|
else begin
|
|
result.kind := cfgError;
|
|
result.msg := errorStr(c, 'symbol expected, but found: ' +
|
|
c.tok.literal);
|
|
break
|
|
end
|
|
end;
|
|
if c.tok.kind in [tkEquals, tkColon] then begin
|
|
rawGetTok(c, c.tok);
|
|
if c.tok.kind = tkSymbol then begin
|
|
result.value := c.tok.literal;
|
|
end
|
|
else begin
|
|
result.kind := cfgError;
|
|
result.msg := errorStr(c, 'symbol expected, but found: '
|
|
+ c.tok.literal);
|
|
end;
|
|
rawGetTok(c, c.tok);
|
|
end
|
|
end
|
|
else begin
|
|
result.kind := cfgError;
|
|
result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal);
|
|
rawGetTok(c, c.tok);
|
|
end;
|
|
end;
|
|
|
|
function next(var c: TCfgParser): TCfgEvent;
|
|
begin
|
|
case c.tok.kind of
|
|
tkEof: result.kind := cfgEof;
|
|
tkDashDash: begin
|
|
rawGetTok(c, c.tok);
|
|
result := getKeyValPair(c, cfgOption);
|
|
end;
|
|
tkSymbol: begin
|
|
result := getKeyValPair(c, cfgKeyValuePair);
|
|
end;
|
|
tkBracketLe: begin
|
|
rawGetTok(c, c.tok);
|
|
if c.tok.kind = tkSymbol then begin
|
|
result.kind := cfgSectionStart;
|
|
result.section := c.tok.literal;
|
|
end
|
|
else begin
|
|
result.kind := cfgError;
|
|
result.msg := errorStr(c, 'symbol expected, but found: ' + c.tok.literal);
|
|
end;
|
|
rawGetTok(c, c.tok);
|
|
if c.tok.kind = tkBracketRi then rawGetTok(c, c.tok)
|
|
else begin
|
|
result.kind := cfgError;
|
|
result.msg := errorStr(c, ''']'' expected, but found: ' + c.tok.literal);
|
|
end
|
|
end;
|
|
tkInvalid, tkBracketRi, tkEquals, tkColon: begin
|
|
result.kind := cfgError;
|
|
result.msg := errorStr(c, 'invalid token: ' + c.tok.literal);
|
|
rawGetTok(c, c.tok);
|
|
end
|
|
end
|
|
end;
|
|
|
|
end.
|