c2nim: better parsing of #ifdef C2NIM; #def support

This commit is contained in:
Andreas Rumpf
2010-07-22 19:18:26 +02:00
parent 2b21285144
commit be878a5993
8 changed files with 407 additions and 101 deletions

View File

@@ -75,9 +75,11 @@ C file lists the OS, CPU and CC the file has been compiled for.
==============
Nimrod supports the generation of DLLs. However, there must be only one
instance of the GC per address space. This instance is contained in
instance of the GC per process/address space. This instance is contained in
``nimrtl.dll``. This means that every generated Nimrod DLL depends
on ``nimrtl.dll``.
on ``nimrtl.dll``. To generate the "nimrtl.dll" file, use the command::
nimrod c -d:release lib/nimrtl.nim
Additional Features

View File

@@ -1,7 +1,7 @@
#
#
# Nimrod's Runtime Library
# (c) Copyright 2009 Andreas Rumpf
# (c) Copyright 2010 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
@@ -187,6 +187,60 @@ elif defined(nogc):
dest^ = src
include "system/cellsets"
elif appType == "lib":
{.warning: "gc in a library context may not work".}
when hostOS == "windows":
const nimrtl = "nimrtl.dll"
elif hostOS == "macosx":
const nimrtl = "nimrtl.dynlib"
else:
const nimrtl = "libnimrtl.so"
when not defined(includeGC):
# ordinary client; use the GC from nimrtl.dll:
proc initGC() {.cdecl, importc, dynlib: nimrtl.}
proc GC_disable() {.cdecl, importc, dynlib: nimrtl.}
proc GC_enable() {.cdecl, importc, dynlib: nimrtl.}
proc GC_fullCollect() {.cdecl, importc, dynlib: nimrtl.}
proc GC_setStrategy(strategy: TGC_Strategy) {.
cdecl, importc, dynlib: nimrtl.}
proc GC_enableMarkAndSweep() {.cdecl, importc, dynlib: nimrtl.}
proc GC_disableMarkAndSweep() {.cdecl, importc, dynlib: nimrtl.}
proc GC_getStatistics(): string {.cdecl, importc, dynlib: nimrtl.}
proc newObj(typ: PNimType, size: int): pointer {.
compilerproc, cdecl, importc, dynlib: nimrtl.}
proc newSeq(typ: PNimType, len: int): pointer {.
compilerproc, cdecl, importc, dynlib: nimrtl.}
proc growObj(old: pointer, newsize: int): pointer {.
cdecl, importc, dynlib: nimrtl.}
proc setStackBottom(theStackBottom: pointer) {.
compilerproc, cdecl, importc, dynlib: nimrtl.}
proc nimGCref(p: pointer) {.
compilerproc, cdecl, importc, dynlib: nimrtl.}
proc nimGCunref(p: pointer) {.
compilerproc, cdecl, importc, dynlib: nimrtl.}
# The write barrier is performance critical!
# XXX We should ensure that they are inlined here.
# Later implementations will do this.
proc unsureAsgnRef(dest: ppointer, src: pointer) {.
compilerproc, cdecl, importc, dynlib: nimrtl.}
proc asgnRef(dest: ppointer, src: pointer) {.
compilerproc, cdecl, importc, dynlib: nimrtl.}
proc asgnRefNoCycle(dest: ppointer, src: pointer) {.
compilerproc, cdecl, importc, dynlib: nimrtl.}
else:
# include the GC and export it!
include "system/alloc"
include "system/cellsets"
assert(sizeof(TCell) == sizeof(TFreeCell))
include "system/gc"
include "system/cellsets"
else:
include "system/alloc"
include "system/cellsets"
@@ -195,4 +249,3 @@ else:
{.pop.}

View File

@@ -28,7 +28,6 @@ Options:
(multiple --prefix options are supported)
--suffix:SUFFIX strip suffix for the generated Nimrod identifiers
(multiple --suffix options are supported)
--skip:IDENT skip IDENT in the input file
-v, --version write c2nim's version
-h, --help show this help
"""

View File

@@ -24,6 +24,7 @@ const
type
TTokKind* = enum
pxInvalid, pxEof,
pxMacroParam, # fake token: macro parameter (with its index)
pxStarComment, # /* */ comment
pxLineComment, # // comment
pxDirective, # #define, etc.
@@ -78,10 +79,9 @@ type
pxIntLit,
pxInt64Lit, # long constant like 0x70fffffff or out of int range
pxFloatLit,
pxParLe, pxParRi,
pxBracketLe, pxBracketRi,
pxParLe, pxBracketLe, pxCurlyLe, # this order is important
pxParRi, pxBracketRi, pxCurlyRi, # for macro argument parsing!
pxComma, pxSemiColon, pxColon,
pxCurlyLe, pxCurlyRi
TTokKinds* = set[TTokKind]
type
@@ -89,7 +89,8 @@ type
TToken* = object
xkind*: TTokKind # the type of the token
s*: string # parsed symbol, char or string literal
iNumber*: BiggestInt # the parsed integer literal
iNumber*: BiggestInt # the parsed integer literal;
# if xkind == pxMacroParam: parameter's position
fNumber*: BiggestFloat # the parsed floating point literal
base*: TNumericalBase # the numerical base; only valid for int
# or float literals
@@ -99,7 +100,6 @@ type
filename*: string
inDirective: bool
proc getTok*(L: var TLexer, tok: var TToken)
proc PrintTok*(tok: TToken)
proc `$`*(tok: TToken): string
@@ -140,6 +140,7 @@ proc TokKindToStr*(k: TTokKind): string =
case k
of pxEof: result = "[EOF]"
of pxInvalid: result = "[invalid]"
of pxMacroParam: result = "[macro param]"
of pxStarComment, pxLineComment: result = "[comment]"
of pxStrLit: result = "[string literal]"
of pxCharLit: result = "[char literal]"

View File

@@ -7,9 +7,9 @@
# distribution, for details about the copyright.
#
# This module implements an Ansi C parser.
# It transfers a C source file into a Nimrod AST. Then the renderer can be
# used to convert the AST to its text representation.
## This module implements an Ansi C parser.
## It translates a C source file into a Nimrod AST. Then the renderer can be
## used to convert the AST to its text representation.
# XXX standalone structs and unions!
# XXX header pragma for struct and union fields!
@@ -20,16 +20,22 @@ import
options, strtabs
type
TParserFlag* = enum
TParserFlag = enum
pfRefs, ## use "ref" instead of "ptr" for C's typ*
pfCDecl, ## annotate procs with cdecl
pfStdCall ## annotate procs with stdcall
TMacro {.final.} = object
name: string
params: int # number of parameters
body: seq[ref TToken] # can contain pxMacroParam tokens
TParserOptions {.final.} = object
flags: set[TParserFlag]
prefixes, suffixes, skipWords: seq[string]
prefixes, suffixes: seq[string]
mangleRules: seq[tuple[pattern: TPeg, frmt: string]]
dynlibSym, header: string
macros: seq[TMacro]
PParserOptions* = ref TParserOptions
TParser* {.final.} = object
@@ -46,7 +52,7 @@ proc newParserOptions*(): PParserOptions =
new(result)
result.prefixes = @[]
result.suffixes = @[]
result.skipWords = @[]
result.macros = @[]
result.mangleRules = @[]
result.flags = {}
result.dynlibSym = ""
@@ -62,7 +68,6 @@ proc setOption*(parserOptions: PParserOptions, key: string, val=""): bool =
of "stdcall": incl(parserOptions.flags, pfStdCall)
of "prefix": parserOptions.prefixes.add(val)
of "suffix": parserOptions.suffixes.add(val)
of "skip": parserOptions.skipWords.add(val)
else: result = false
proc ParseUnit*(p: var TParser): PNode
@@ -71,7 +76,6 @@ proc openParser*(p: var TParser, filename: string, inputStream: PLLStream,
proc closeParser*(p: var TParser)
proc exSymbol*(n: var PNode)
proc fixRecordDef*(n: var PNode)
# XXX: move these two to an auxiliary module
# implementation
@@ -81,7 +85,11 @@ proc OpenParser(p: var TParser, filename: string,
p.options = options
p.backtrack = @[]
new(p.tok)
proc parMessage(p: TParser, msg: TMsgKind, arg = "") =
#assert false
lexMessage(p.lex, msg, arg)
proc CloseParser(p: var TParser) = CloseLexer(p.lex)
proc safeContext(p: var TParser) = p.backtrack.add(p.tok)
proc closeContext(p: var TParser) = discard p.backtrack.pop()
@@ -102,18 +110,81 @@ proc rawGetTok(p: var TParser) =
p.tok.next = t
p.tok = t
proc isSkipWord(p: TParser): bool =
for s in items(p.options.skipWords):
if p.tok.s == s: return true
proc findMacro(p: TParser): int =
for i in 0..high(p.options.macros):
if p.tok.s == p.options.macros[i].name: return i
return -1
proc rawEat(p: var TParser, xkind: TTokKind) =
if p.tok.xkind == xkind: rawGetTok(p)
else: parMessage(p, errTokenExpected, TokKindToStr(xkind))
proc parseMacroArguments(p: var TParser): seq[seq[ref TToken]] =
result = @[]
result.add(@[])
var i: array[pxParLe..pxCurlyLe, int]
var L = 0
safeContext(p)
while true:
var kind = p.tok.xkind
case kind
of pxEof: rawEat(p, pxParRi)
of pxParLe, pxBracketLe, pxCurlyLe:
inc(i[kind])
result[L].add(p.tok)
of pxParRi:
# end of arguments?
if i[pxParLe] == 0: break
dec(i[pxParLe])
result[L].add(p.tok)
of pxBracketRi, pxCurlyRi:
kind = pred(kind, 3)
if i[kind] > 0: dec(i[kind])
result[L].add(p.tok)
of pxComma:
if i[pxParLe] == 0 and i[pxBracketLe] == 0 and i[pxCurlyLe] == 0:
# next argument: comma is not part of the argument
rawGetTok(p)
result.add(@[])
inc(L)
else:
# comma does not separate different arguments:
result[L].add(p.tok)
else:
result[L].add(p.tok)
rawGetTok(p)
closeContext(p)
proc expandMacro(p: var TParser, m: TMacro) =
rawGetTok(p) # skip macro name
var arguments: seq[seq[ref TToken]]
if m.params > 0:
rawEat(p, pxParLe)
arguments = parseMacroArguments(p)
if arguments.len != m.params: parMessage(p, errWrongNumberOfArguments)
rawEat(p, pxParRi)
# insert into the token list:
if m.body.len > 0:
var newList: ref TToken
new(newList)
var lastTok = newList
for tok in items(m.body):
if tok.xkind == pxMacroParam:
for t in items(arguments[int(tok.iNumber)]):
lastTok.next = t
lastTok = t
else:
lastTok.next = tok
lastTok = tok
lastTok.next = p.tok
p.tok = newList.next
proc getTok(p: var TParser) =
while true:
rawGetTok(p)
if p.tok.xkind != pxSymbol or not isSkipWord(p): break
proc parMessage(p: TParser, msg: TMsgKind, arg = "") =
#assert false
lexMessage(p.lex, msg, arg)
rawGetTok(p)
if p.tok.xkind == pxSymbol:
var idx = findMacro(p)
if idx >= 0:
expandMacro(p, p.options.macros[idx])
proc parLineInfo(p: TParser): TLineInfo =
result = getLineInfo(p.lex)

View File

@@ -1,3 +1,12 @@
#
#
# c2nim - C to Nimrod source converter
# (c) Copyright 2010 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
# Preprocessor support
const
@@ -9,6 +18,10 @@ proc eatNewLine(p: var TParser, n: PNode) =
if p.tok.xkind == pxNewLine: getTok(p)
else:
eat(p, pxNewLine)
proc skipLine(p: var TParser) =
while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}: getTok(p)
eatNewLine(p, nil)
proc parseDefineBody(p: var TParser, tmplDef: PNode): string =
if p.tok.xkind == pxCurlyLe or
@@ -65,6 +78,57 @@ proc parseDefine(p: var TParser): PNode =
addSon(result, c)
eatNewLine(p, c)
proc parseDefBody(p: var TParser, m: var TMacro, params: seq[string]) =
m.body = @[]
# A little hack: We safe the context, so that every following token will be
# put into a newly allocated TToken object. Thus we can just save a
# reference to the token in the macro's body.
safeContext(p)
while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}:
case p.tok.xkind
of pxSymbol:
# is it a parameter reference?
var tok = p.tok
for i in 0..high(params):
if params[i] == p.tok.s:
new(tok)
tok.xkind = pxMacroParam
tok.iNumber = i
break
m.body.add(tok)
of pxDirConc:
# just ignore this token: this implements token merging correctly
nil
else:
m.body.add(p.tok)
# we do not want macro expansion here:
rawGetTok(p)
eatNewLine(p, nil)
closeContext(p)
# newline token might be overwritten, but this is not
# part of the macro body, so it is safe.
proc parseDef(p: var TParser, m: var TMacro) =
var hasParams = p.tok.xkind == pxDirectiveParLe
getTok(p)
expectIdent(p)
m.name = p.tok.s
getTok(p)
var params: seq[string] = @[]
# parse parameters:
if hasParams:
eat(p, pxParLe)
while p.tok.xkind != pxParRi:
expectIdent(p)
params.add(p.tok.s)
getTok(p)
skipStarCom(p, nil)
if p.tok.xkind != pxComma: break
getTok(p)
eat(p, pxParRi)
m.params = params.len
parseDefBody(p, m, params)
proc isDir(p: TParser, dir: string): bool =
result = p.tok.xkind in {pxDirectiveParLe, pxDirective} and p.tok.s == dir
@@ -102,6 +166,12 @@ proc parseStmtList(p: var TParser): PNode =
else: nil
addSon(result, statement(p))
proc eatEndif(p: var TParser) =
if isDir(p, "endif"):
skipLine(p)
else:
parMessage(p, errXExpected, "#endif")
proc parseIfDirAux(p: var TParser, result: PNode) =
addSon(result.sons[0], parseStmtList(p))
while isDir(p, "elif"):
@@ -113,67 +183,133 @@ proc parseIfDirAux(p: var TParser, result: PNode) =
addSon(result, b)
if isDir(p, "else"):
var s = newNodeP(nkElse, p)
while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}: getTok(p)
eatNewLine(p, nil)
skipLine(p)
addSon(s, parseStmtList(p))
addSon(result, s)
if isDir(p, "endif"):
while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}: getTok(p)
eatNewLine(p, nil)
else:
parMessage(p, errXExpected, "#endif")
eatEndif(p)
proc specialIf(p: TParser): bool =
ExpectIdent(p)
result = p.tok.s == c2nimSymbol
when false:
proc specialIf(p: TParser): bool =
ExpectIdent(p)
result = p.tok.s == c2nimSymbol
proc chooseBranch(whenStmt: PNode, branch: int): PNode =
var L = sonsLen(whenStmt)
if branch < L:
if L == 2 and whenStmt[1].kind == nkElse or branch == 0:
result = lastSon(whenStmt[branch])
else:
var b = whenStmt[branch]
assert(b.kind == nkElifBranch)
result = newNodeI(nkWhenStmt, whenStmt.info)
for i in branch .. L-1:
addSon(result, whenStmt[i])
proc chooseBranch(whenStmt: PNode, branch: int): PNode =
var L = sonsLen(whenStmt)
if branch < L:
if L == 2 and whenStmt[1].kind == nkElse or branch == 0:
result = lastSon(whenStmt[branch])
else:
var b = whenStmt[branch]
assert(b.kind == nkElifBranch)
result = newNodeI(nkWhenStmt, whenStmt.info)
for i in branch .. L-1:
addSon(result, whenStmt[i])
proc skipIfdefCPlusPlus(p: var TParser): PNode =
proc skipUntilEndif(p: var TParser) =
var nested = 1
while p.tok.xkind != pxEof:
if isDir(p, "endif"):
while p.tok.xkind notin {pxEof, pxNewLine, pxLineComment}: getTok(p)
eatNewLine(p, nil)
return
if isDir(p, "ifdef") or isDir(p, "ifndef") or isDir(p, "if"):
inc(nested)
elif isDir(p, "endif"):
dec(nested)
if nested <= 0:
skipLine(p)
return
getTok(p)
parMessage(p, errXExpected, "#endif")
proc parseIfdefDir(p: var TParser): PNode =
result = newNodeP(nkWhenStmt, p)
addSon(result, newNodeP(nkElifBranch, p))
getTok(p)
var special = specialIf(p)
if p.tok.s == "__cplusplus":
return skipIfdefCPlusPlus(p)
addSon(result.sons[0], definedExprAux(p))
eatNewLine(p, nil)
parseIfDirAux(p, result)
if special:
result = chooseBranch(result, 0)
type
TEndifMarker = enum
emElif, emElse, emEndif
proc skipUntilElifElseEndif(p: var TParser): TEndifMarker =
var nested = 1
while p.tok.xkind != pxEof:
if isDir(p, "ifdef") or isDir(p, "ifndef") or isDir(p, "if"):
inc(nested)
elif isDir(p, "elif") and nested <= 1:
return emElif
elif isDir(p, "else") and nested <= 1:
return emElse
elif isDir(p, "endif"):
dec(nested)
if nested <= 0:
return emEndif
getTok(p)
parMessage(p, errXExpected, "#endif")
proc parseIfdef(p: var TParser): PNode =
getTok(p) # skip #ifdef
ExpectIdent(p)
case p.tok.s
of "__cplusplus":
skipUntilEndif(p)
of c2nimSymbol:
skipLine(p)
result = parseStmtList(p)
skipUntilEndif(p)
else:
result = newNodeP(nkWhenStmt, p)
addSon(result, newNodeP(nkElifBranch, p))
addSon(result.sons[0], definedExprAux(p))
eatNewLine(p, nil)
parseIfDirAux(p, result)
proc parseIfndef(p: var TParser): PNode =
getTok(p) # skip #ifndef
ExpectIdent(p)
if p.tok.s == c2nimSymbol:
skipLine(p)
case skipUntilElifElseEndif(p)
of emElif:
result = newNodeP(nkWhenStmt, p)
addSon(result, newNodeP(nkElifBranch, p))
getTok(p)
addSon(result.sons[0], expression(p))
eatNewLine(p, nil)
parseIfDirAux(p, result)
of emElse:
skipLine(p)
result = parseStmtList(p)
eatEndif(p)
of emEndif: skipLine(p)
else:
result = newNodeP(nkWhenStmt, p)
addSon(result, newNodeP(nkElifBranch, p))
var e = newNodeP(nkCall, p)
addSon(e, newIdentNodeP("not", p))
addSon(e, definedExprAux(p))
eatNewLine(p, nil)
addSon(result.sons[0], e)
parseIfDirAux(p, result)
when false:
proc parseIfdefDir(p: var TParser): PNode =
result = newNodeP(nkWhenStmt, p)
addSon(result, newNodeP(nkElifBranch, p))
getTok(p)
var special = specialIf(p)
if p.tok.s == "__cplusplus":
return skipIfdefCPlusPlus(p)
addSon(result.sons[0], definedExprAux(p))
eatNewLine(p, nil)
parseIfDirAux(p, result)
if special:
result = chooseBranch(result, 0)
proc parseIfndefDir(p: var TParser): PNode =
result = newNodeP(nkWhenStmt, p)
addSon(result, newNodeP(nkElifBranch, p))
getTok(p)
var special = specialIf(p)
var e = newNodeP(nkCall, p)
addSon(e, newIdentNodeP("not", p))
addSon(e, definedExprAux(p))
eatNewLine(p, nil)
addSon(result.sons[0], e)
parseIfDirAux(p, result)
if special:
result = chooseBranch(result, 1)
proc parseIfndefDir(p: var TParser): PNode =
result = newNodeP(nkWhenStmt, p)
addSon(result, newNodeP(nkElifBranch, p))
getTok(p)
var special = specialIf(p)
var e = newNodeP(nkCall, p)
addSon(e, newIdentNodeP("not", p))
addSon(e, definedExprAux(p))
eatNewLine(p, nil)
addSon(result.sons[0], e)
parseIfDirAux(p, result)
if special:
result = chooseBranch(result, 1)
proc parseIfDir(p: var TParser): PNode =
result = newNodeP(nkWhenStmt, p)
@@ -206,14 +342,14 @@ proc parseDir(p: var TParser): PNode =
case p.tok.s
of "define": result = parseDefine(p)
of "include": result = parseInclude(p)
of "ifdef": result = parseIfdefDir(p)
of "ifndef": result = parseIfndefDir(p)
of "ifdef": result = parseIfdef(p)
of "ifndef": result = parseIfndef(p)
of "if": result = parseIfDir(p)
of "cdecl", "stdcall", "ref":
discard setOption(p.options, p.tok.s)
getTok(p)
eatNewLine(p, nil)
of "dynlib", "header", "prefix", "suffix", "skip":
of "dynlib", "header", "prefix", "suffix":
var key = p.tok.s
getTok(p)
if p.tok.xkind != pxStrLit: ExpectIdent(p)
@@ -222,10 +358,11 @@ proc parseDir(p: var TParser): PNode =
eatNewLine(p, nil)
of "mangle":
parseMangleDir(p)
of "def":
var L = p.options.macros.len
setLen(p.options.macros, L+1)
parseDef(p, p.options.macros[L])
else:
# ignore unimportant/unknown directive ("undef", "pragma", "error")
while true:
getTok(p)
if p.tok.xkind in {pxEof, pxNewLine, pxLineComment}: break
eatNewLine(p, nil)
skipLine(p)

View File

@@ -202,27 +202,31 @@ identifier should be converted:
#mangle "'GTK_'{.*}" "TGtk$1"
``#skip`` directive
-------------------
**Note**: There is also ``--skip`` command line option that can be used for the
same purpose.
``#def`` directive
------------------
Often C code contains special macros that affect the declaration of a function
prototype but confuse c2nim's parser:
.. code-block:: C
// does not parse!
EXPORT int f(void);
EXPORT int g(void);
EXTERN(int) f(void);
EXTERN(int) g(void);
Instead of removing ``EXPORT`` from the input source file, one can tell c2nim
to skip special identifiers:
Instead of removing ``EXTERN()`` from the input source file (which cannot be
done reliably even with a regular expression!), one can tell c2nim
that ``EXPORT`` is a macro that should be expanded by c2nim too:
.. code-block:: C
#skip EXPORT
// does parse now!
EXPORT int f(void);
EXPORT int g(void);
#ifdef C2NIM
# def EXTERN(x) static x
#endif
// parses now!
EXTERN(int) f(void);
EXTERN(int) g(void);
``#def`` is very similar to C's ``#define``.
Limitations

View File

@@ -4,12 +4,43 @@
*/
#ifdef __cplusplus
# ifdef __SOME_OTHER_CRAP
extern "C" {
# endif
#endif
// Test C2NIM skipping:
#ifndef C2NIM
#if someNestedCond
This is an invalid text that should generate a parser error, if not
#endif
skipped correctly.
#endif
#ifndef C2NIM
#if someNestedCond
This is an invalid text that should generate a parser error, if not
#endif
skipped correctly.
#else
typedef char gchar;
typedef unsigned int gunsignedint;
typedef unsigned char guchar;
#endif
#ifdef C2NIM
# mangle "'those'" "these"
int those;
#elif abc
#if someNestedCond
This is an invalid text that should generate a parser error, if not
#else
skipped correctly.
#endif
#else
Another crappy input line.
#endif
point* newPoint(void) {
for (int i = 0; i < 89; ++i) echo("test" " string " "concatenation");
@@ -97,7 +128,15 @@ int IupConvertXYToPos(PIhandle ih, int x, int y);
#endif
#skip EXPORT
#ifdef C2NIM
# def EXTERN(x) static x
#endif
// parses now!
EXTERN(int) f(void);
EXTERN(int) g(void);
#def EXPORT
// does parse now!
EXPORT int f(void);
EXPORT int g(void);