mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 17:34:43 +00:00
Merge branch 'master' of github.com:Araq/Nimrod
This commit is contained in:
@@ -243,11 +243,6 @@ proc LoadConfigs*(cfg: string) =
|
||||
readConfigFile(pd / cfg)
|
||||
|
||||
if gProjectName.len != 0:
|
||||
var conffile = changeFileExt(gProjectFull, "cfg")
|
||||
if conffile != pd / cfg and existsFile(conffile):
|
||||
readConfigFile(conffile)
|
||||
rawMessage(warnConfigDeprecated, conffile)
|
||||
|
||||
# new project wide config file:
|
||||
readConfigFile(changeFileExt(gProjectFull, "nimrod.cfg"))
|
||||
|
||||
|
||||
54
doc/tut1.txt
54
doc/tut1.txt
@@ -189,9 +189,18 @@ to a storage location:
|
||||
var x = "abc" # introduces a new variable `x` and assigns a value to it
|
||||
x = "xyz" # assigns a new value to `x`
|
||||
|
||||
``=`` is the *assignment operator*. The assignment operator cannot
|
||||
be overloaded, overwritten or forbidden, but this might change in a future
|
||||
version of Nimrod.
|
||||
``=`` is the *assignment operator*. The assignment operator cannot be
|
||||
overloaded, overwritten or forbidden, but this might change in a future version
|
||||
of Nimrod. You can declare multiple variables with a single assignment
|
||||
statement and all the variables will have the same value:
|
||||
|
||||
.. code-block::
|
||||
var x, y = 3 # assigns 3 to the variables `x` and `y`
|
||||
echo "x ", x # outputs "x 3"
|
||||
echo "y ", y # outputs "y 3"
|
||||
x = 42 # changes `x` to 42 without changing `y`
|
||||
echo "x ", x # outputs "x 42"
|
||||
echo "y ", y # outputs "y 3"
|
||||
|
||||
|
||||
Constants
|
||||
@@ -1352,6 +1361,45 @@ Even though you don't need to declare a type for a tuple to use it, tuples
|
||||
created with different field names will be considered different objects despite
|
||||
having the same field types.
|
||||
|
||||
Tuples can be *unpacked* during variable assignment (and only then!). This can
|
||||
be handy to assign directly the fields of the tuples to individually named
|
||||
variables. An example of this is the ``splitFile`` proc from the `os module
|
||||
<os.html>`_ which returns the directory, name and extension of a path at the
|
||||
same time. For tuple unpacking to work you have to use parenthesis around the
|
||||
values you want to assign the unpacking to, otherwise you will be assigning the
|
||||
same value to all the individual variables! Example:
|
||||
|
||||
.. code-block:: nimrod
|
||||
|
||||
import os
|
||||
|
||||
let
|
||||
path = "usr/local/nimrodc.html"
|
||||
(dir, name, ext) = splitFile(path)
|
||||
baddir, badname, badext = splitFile(path)
|
||||
echo dir # outputs `usr/local`
|
||||
echo name # outputs `nimrodc`
|
||||
echo ext # outputs `.html`
|
||||
# All the following output the same line:
|
||||
# `(dir: usr/local, name: nimrodc, ext: .html)`
|
||||
echo baddir
|
||||
echo badname
|
||||
echo badext
|
||||
|
||||
Tuple unpacking **only** works in ``var`` or ``let`` blocks. The following code
|
||||
won't compile:
|
||||
|
||||
.. code-block:: nimrod
|
||||
|
||||
import os
|
||||
|
||||
var
|
||||
path = "usr/local/nimrodc.html"
|
||||
dir, name, ext = ""
|
||||
|
||||
(dir, name, ext) = splitFile(path)
|
||||
# --> Error: '(dir, name, ext)' cannot be assigned to
|
||||
|
||||
|
||||
Reference and pointer types
|
||||
---------------------------
|
||||
|
||||
267
doc/tut2.txt
267
doc/tut2.txt
@@ -699,15 +699,22 @@ once.
|
||||
Macros
|
||||
======
|
||||
|
||||
Macros enable advanced compile-time code transformations, but they
|
||||
cannot change Nimrod's syntax. However, this is no real restriction because
|
||||
Nimrod's syntax is flexible enough anyway.
|
||||
Macros enable advanced compile-time code transformations, but they cannot
|
||||
change Nimrod's syntax. However, this is no real restriction because Nimrod's
|
||||
syntax is flexible enough anyway. Macros have to be implemented in pure Nimrod
|
||||
code if `foreign function interface (FFI)
|
||||
<manual.html#foreign-function-interface>`_ is not enabled in the compiler, but
|
||||
other than that restriction (which at some point in the future will go away)
|
||||
you can write any kind of Nimrod code and the compiler will run it at compile
|
||||
time.
|
||||
|
||||
To write a macro, one needs to know how the Nimrod concrete syntax is converted
|
||||
to an abstract syntax tree (AST). The AST is documented in the
|
||||
`macros <macros.html>`_ module.
|
||||
There are two ways to write a macro, either *generating* Nimrod source code and
|
||||
letting the compiler parse it, or creating manually an abstract syntax tree
|
||||
(AST) which you feed to the compiler. In order to build the AST one needs to
|
||||
know how the Nimrod concrete syntax is converted to an abstract syntax tree
|
||||
(AST). The AST is documented in the `macros <macros.html>`_ module.
|
||||
|
||||
There are two ways to invoke a macro:
|
||||
Once your macro is finished, there are two ways to invoke it:
|
||||
(1) invoking a macro like a procedure call (`expression macros`:idx:)
|
||||
(2) invoking a macro with the special ``macrostmt``
|
||||
syntax (`statement macros`:idx:)
|
||||
@@ -796,3 +803,249 @@ Term rewriting macros
|
||||
Term rewriting macros can be used to enhance the compilation process
|
||||
with user defined optimizations; see this `document <trmacros.html>`_ for
|
||||
further information.
|
||||
|
||||
|
||||
Building your first macro
|
||||
-------------------------
|
||||
|
||||
To give a footstart to writing macros we will show now how to turn your typical
|
||||
dynamic code into something that compiles statically. For the exercise we will
|
||||
use the following snippet of code as the starting point:
|
||||
|
||||
.. code-block:: nimrod
|
||||
|
||||
import strutils, tables
|
||||
|
||||
proc readCfgAtRuntime(cfgFilename: string): TTable[string, string] =
|
||||
let
|
||||
inputString = readFile(cfgFilename)
|
||||
var
|
||||
source = ""
|
||||
|
||||
result = initTable[string, string]()
|
||||
for line in inputString.splitLines:
|
||||
# Ignore empty lines
|
||||
if line.len < 1: continue
|
||||
var chunks = split(line, ',')
|
||||
if chunks.len != 2:
|
||||
quit("Input needs comma split values, got: " & line)
|
||||
result[chunks[0]] = chunks[1]
|
||||
|
||||
if result.len < 1: quit("Input file empty!")
|
||||
|
||||
let info = readCfgAtRuntime("data.cfg")
|
||||
|
||||
when isMainModule:
|
||||
echo info["licenseOwner"]
|
||||
echo info["licenseKey"]
|
||||
echo info["version"]
|
||||
|
||||
Presumably this snippet of code could be used in a commercial software, reading
|
||||
a configuration file to display information about the person who bought the
|
||||
software. This external file would be generated by an online web shopping cart
|
||||
to be included along the program containing the license information::
|
||||
|
||||
version,1.1
|
||||
licenseOwner,Hyori Lee
|
||||
licenseKey,M1Tl3PjBWO2CC48m
|
||||
|
||||
The ``readCfgAtRuntime`` proc will open the given filename and return a
|
||||
``TTable`` from the `tables module <tables.html>`_. The parsing of the file is
|
||||
done (without much care for handling invalid data or corner cases) using the
|
||||
``split`` proc from the `strutils module <strutils.html>`_. There are many
|
||||
things which can fail; mind the purpose is explaining how to make this run at
|
||||
compile time, not how to properly implement a DRM scheme.
|
||||
|
||||
The reimplementation of this code as a compile time proc will allow us to get
|
||||
rid of the ``data.cfg`` file we would need to distribute along the binary, plus
|
||||
if the information is really constant, it doesn't make from a logical point of
|
||||
view to have it *mutable* in a global variable, it would be better if it was a
|
||||
constant. Finally, and likely the most valuable feature, we can implement some
|
||||
verification at compile time. You could think of this as a *better unit
|
||||
testing*, since it is impossible to obtain a binary unless everything is
|
||||
correct, preventing you to ship to users a broken program which won't start
|
||||
because a small critical file is missing or its contents changed by mistake to
|
||||
something invalid.
|
||||
|
||||
|
||||
Generating source code
|
||||
++++++++++++++++++++++
|
||||
|
||||
Our first attempt will start by modifying the program to generate a compile
|
||||
time string with the *generated source code*, which we then pass to the
|
||||
``parseStmt`` proc from the `macros module <macros.html>`_. Here is the
|
||||
modified source code implementing the macro:
|
||||
|
||||
.. code-block:: nimrod
|
||||
import macros, strutils
|
||||
|
||||
macro readCfgAndBuildSource(cfgFilename: string): stmt =
|
||||
let
|
||||
inputString = slurp(cfgFilename.strVal)
|
||||
var
|
||||
source = ""
|
||||
|
||||
for line in inputString.splitLines:
|
||||
# Ignore empty lines
|
||||
if line.len < 1: continue
|
||||
var chunks = split(line, ',')
|
||||
if chunks.len != 2:
|
||||
error("Input needs comma split values, got: " & line)
|
||||
source &= "const cfg" & chunks[0] & "= \"" & chunks[1] & "\"\n"
|
||||
|
||||
if source.len < 1: error("Input file empty!")
|
||||
result = parseStmt(source)
|
||||
|
||||
readCfgAndBuildSource("data.cfg")
|
||||
|
||||
when isMainModule:
|
||||
echo cfglicenseOwner
|
||||
echo cfglicenseKey
|
||||
echo cfgversion
|
||||
|
||||
The good news is not much has changed! First, we need to change the handling of
|
||||
the input parameter. In the dynamic version the ``readCfgAtRuntime`` proc
|
||||
receives a string parameter. However, in the macro version it is also declared
|
||||
as string, but this is the *outside* interface of the macro. When the macro is
|
||||
run, it actually gets a ``PNimrodNode`` object instead of a string, and we have
|
||||
to call the ``strVal`` proc from the `macros module <macros.html>`_ to obtain
|
||||
the string being passed in to the macro.
|
||||
|
||||
Second, we cannot use the ``readFile`` proc from the `system module
|
||||
<system.html>`_ due to FFI restriction at compile time. If we try to use this
|
||||
proc, or any other which depends on FFI, the compiler will error with the
|
||||
message ``cannot evaluate`` and a dump of the macro's source code, along with a
|
||||
stack trace where the compiler reached before bailing out. We can get around
|
||||
this limitation by using the ``slurp`` proc from the `system module
|
||||
<system.html>`_, which was precisely made for compilation time (just like
|
||||
``gorge`` which executes an external program and captures its output).
|
||||
|
||||
The interesting thing is that our macro does not return a runtime ``TTable``
|
||||
object. Instead, it builds up Nimrod source code into the ``source`` variable.
|
||||
For each line of the configuration file a ``const`` variable will be generated.
|
||||
To avoid conflicts we prefix these variables with ``cfg``. In essence, what the
|
||||
compiler is doing is replacing the line calling the macro with the following
|
||||
snippet of code:
|
||||
|
||||
.. code-block:: nimrod
|
||||
const cfgversion= "1.1"
|
||||
const cfglicenseOwner= "Hyori Lee"
|
||||
const cfglicenseKey= "M1Tl3PjBWO2CC48m"
|
||||
|
||||
You can verify this yourself adding the line ``echo source`` somewhere at the
|
||||
end of the macro and compiling the program. Another difference is that instead
|
||||
of calling the usual ``quit`` proc to abort (which we could still call) this
|
||||
version calls the ``error`` proc. The ``error`` proc has the same behavior as
|
||||
``quit`` but will dump also the source and file line information where the
|
||||
error happened, making it easier for the programmer to find where compilation
|
||||
failed. In this situation it would point to the line invoking the macro, but
|
||||
**not** the line of ``data.cfg`` we are processing, that's something the macro
|
||||
itself would need to control.
|
||||
|
||||
|
||||
Generating AST by hand
|
||||
++++++++++++++++++++++
|
||||
|
||||
To generate an AST we would need to intimately know the structures used by the
|
||||
Nimrod compiler exposed in the `macros module <macros.html>`_, which at first
|
||||
look seems a daunting task. But we can use a helper shortcut the ``dumpTree``
|
||||
macro, which is used as a statement macro instead of an expression macro.
|
||||
Since we know that we want to generate a bunch of ``const`` symbols we can
|
||||
create the following source file and compile it to see what the compiler
|
||||
*expects* from us:
|
||||
|
||||
.. code-block:: nimrod
|
||||
import macros
|
||||
|
||||
dumpTree:
|
||||
const cfgversion: string = "1.1"
|
||||
const cfglicenseOwner= "Hyori Lee"
|
||||
const cfglicenseKey= "M1Tl3PjBWO2CC48m"
|
||||
|
||||
During compilation of the source code we should see the following lines in the
|
||||
output (again, since this is a macro, compilation is enough, you don't have to
|
||||
run any binary)::
|
||||
|
||||
StmtList
|
||||
ConstSection
|
||||
ConstDef
|
||||
Ident !"cfgversion"
|
||||
Ident !"string"
|
||||
StrLit 1.1
|
||||
ConstSection
|
||||
ConstDef
|
||||
Ident !"cfglicenseOwner"
|
||||
Empty
|
||||
StrLit Hyori Lee
|
||||
ConstSection
|
||||
ConstDef
|
||||
Ident !"cfglicenseKey"
|
||||
Empty
|
||||
StrLit M1Tl3PjBWO2CC48m
|
||||
|
||||
With this output we have a better idea of what kind of input the compiler
|
||||
expects. We need to generate a list of statements. For each constant the source
|
||||
code generates a ``ConstSection`` and a ``ConstDef``. If we were to move all
|
||||
the constants to a single ``const`` block we would see only a single
|
||||
``ConstSection`` with three children.
|
||||
|
||||
Maybe you didn't notice, but in the ``dumpTree`` example the first constant
|
||||
explicitly specifies the type of the constant. That's why in the tree output
|
||||
the two last constants have their second child ``Empty`` but the first has a
|
||||
string identifier. So basically a ``const`` definition is made up from an
|
||||
identifier, optionally a type (can be an *empty* node) and the value. Armed
|
||||
with this knowledge, let's look at the finished version of the AST building
|
||||
macro:
|
||||
|
||||
.. code-block:: nimrod
|
||||
import macros, strutils
|
||||
|
||||
macro readCfgAndBuildAST(cfgFilename: string): stmt =
|
||||
let
|
||||
inputString = slurp(cfgFilename.strVal)
|
||||
|
||||
result = newNimNode(nnkStmtList)
|
||||
for line in inputString.splitLines:
|
||||
# Ignore empty lines
|
||||
if line.len < 1: continue
|
||||
var chunks = split(line, ',')
|
||||
if chunks.len != 2:
|
||||
error("Input needs comma split values, got: " & line)
|
||||
var
|
||||
section = newNimNode(nnkConstSection)
|
||||
constDef = newNimNode(nnkConstDef)
|
||||
constDef.add(newIdentNode("cfg" & chunks[0]))
|
||||
constDef.add(newEmptyNode())
|
||||
constDef.add(newStrLitNode(chunks[1]))
|
||||
section.add(constDef)
|
||||
result.add(section)
|
||||
|
||||
if result.len < 1: error("Input file empty!")
|
||||
|
||||
readCfgAndBuildAST("data.cfg")
|
||||
|
||||
when isMainModule:
|
||||
echo cfglicenseOwner
|
||||
echo cfglicenseKey
|
||||
echo cfgversion
|
||||
|
||||
Since we are building on the previous example generating source code, we will
|
||||
only mention the differences to it. Instead of creating a temporary ``string``
|
||||
variable and writing into it source code as if it were written *by hand*, we
|
||||
use the ``result`` variable directly and create a statement list node
|
||||
(``nnkStmtList``) which will hold our children.
|
||||
|
||||
For each input line we have to create a constant definition (``nnkConstDef``)
|
||||
and wrap it inside a constant section (``nnkConstSection``). Once these
|
||||
variables are created, we fill them hierarchichally like the previous AST dump
|
||||
tree showed: the constant definition is a child of the section definition, and
|
||||
the constant definition has an identifier node, an empty node (we let the
|
||||
compiler figure out the type), and a string literal with the value.
|
||||
|
||||
A last tip when writing a macro: if you are not sure the AST you are building
|
||||
looks ok, you may be tempted to use the ``dumpTree`` macro. But you can't use
|
||||
it *inside* the macro you are writting/debugging. Instead ``echo`` the string
|
||||
generated by ``treeRepr``. If at the end of the this example you add ``echo
|
||||
treeRepr(result)`` you should get the same output as using the ``dumpTree``
|
||||
macro, but of course you can call that at any point of the macro where you
|
||||
might be having troubles.
|
||||
|
||||
@@ -17,11 +17,37 @@
|
||||
##
|
||||
## echo loadHtml("mydirty.html")
|
||||
##
|
||||
##
|
||||
## Every tag in the resulting tree is in lower case.
|
||||
##
|
||||
## **Note:** The resulting ``PXmlNode`` already uses the ``clientData`` field,
|
||||
## so it cannot be used by clients of this library.
|
||||
##
|
||||
## Example: Transforming hyperlinks
|
||||
## ================================
|
||||
##
|
||||
## This code demonstrates how you can iterate over all the tags in an HTML file
|
||||
## and write back the modified version. In this case we look for hyperlinks
|
||||
## ending with the extension ``.rst`` and convert them to ``.html``.
|
||||
##
|
||||
## .. code-block:: nimrod
|
||||
##
|
||||
## import htmlparser
|
||||
## import xmltree # To use '$' for PXmlNode
|
||||
## import strtabs # To access PXmlAttributes
|
||||
## import os # To use splitFile
|
||||
## import strutils # To use cmpIgnoreCase
|
||||
##
|
||||
## proc transformHyperlinks() =
|
||||
## let html = loadHTML("input.html")
|
||||
##
|
||||
## for a in html.findAll("a"):
|
||||
## let href = a.attrs["href"]
|
||||
## if not href.isNil:
|
||||
## let (dir, filename, ext) = splitFile(href)
|
||||
## if cmpIgnoreCase(ext, ".rst") == 0:
|
||||
## a.attrs["href"] = dir / filename & ".html"
|
||||
##
|
||||
## writeFile("output.html", $html)
|
||||
|
||||
import strutils, streams, parsexml, xmltree, unicode, strtabs
|
||||
|
||||
@@ -528,7 +554,7 @@ proc parseHtml*(s: PStream, filename: string,
|
||||
## parses the XML from stream `s` and returns a ``PXmlNode``. Every
|
||||
## occured parsing error is added to the `errors` sequence.
|
||||
var x: TXmlParser
|
||||
open(x, s, filename, {reportComments})
|
||||
open(x, s, filename, {reportComments, reportWhitespace})
|
||||
next(x)
|
||||
# skip the DOCTYPE:
|
||||
if x.kind == xmlSpecial: next(x)
|
||||
|
||||
Reference in New Issue
Block a user