diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim
old mode 100644
new mode 100755
diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim
old mode 100644
new mode 100755
diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim
index a209e8be0c..598ae6c686 100755
--- a/lib/pure/parsexml.nim
+++ b/lib/pure/parsexml.nim
@@ -196,6 +196,12 @@ proc errorMsgExpected*(my: TXmlParser, tag: string): string =
## other error messages
result = "$1($2, $3) Error: $4" % [
my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag]
+
+proc errorMsg*(my: TXmlParser, msg: string): string =
+ ## returns an error message with text `msg` in the same format as the
+ ## other error messages
+ result = "$1($2, $3) Error: $4" % [
+ my.filename, $getLine(my), $getColumn(my), msg]
proc markError(my: var TXmlParser, kind: TXmlError) {.inline.} =
my.err = kind
diff --git a/lib/pure/unidecode/gen.py b/lib/pure/unidecode/gen.py
old mode 100644
new mode 100755
diff --git a/lib/pure/unidecode/unidecode.dat b/lib/pure/unidecode/unidecode.dat
old mode 100644
new mode 100755
diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim
old mode 100644
new mode 100755
diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim
old mode 100644
new mode 100755
diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim
old mode 100644
new mode 100755
diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim
old mode 100644
new mode 100755
index aeec842d7f..005969fc4b
--- a/lib/pure/xmltree.nim
+++ b/lib/pure/xmltree.nim
@@ -18,13 +18,14 @@ type
xnText, ## a text element
xnElement, ## an element with 0 or more children
xnCData, ## a CDATA node
+ xnEntity, ## an entity (like ``&thing;``)
xnComment ## an XML comment
PXmlAttributes* = PStringTable ## an alias for a string to string mapping
TXmlNode {.pure, final, acyclic.} = object
case k: TXmlNodeKind
- of xnText, xnComment, xnCData:
+ of xnText, xnComment, xnCData, xnEntity:
fText: string
of xnElement:
fTag: string
@@ -59,10 +60,15 @@ proc newCData*(cdata: string): PXmlNode =
result = newXmlNode(xnCData)
result.fText = cdata
+proc newEntity*(entity: string): PXmlNode =
+ ## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`.
+ result = newXmlNode(xnCData)
+ result.fText = cdata
+
proc text*(n: PXmlNode): string {.inline.} =
- ## gets the associated text with the node `n`. `n` can be a CDATA, Text
- ## or comment node.
- assert n.k in {xnText, xnComment, xnCData}
+ ## gets the associated text with the node `n`. `n` can be a CDATA, Text,
+ ## comment, or entity node.
+ assert n.k in {xnText, xnComment, xnCData, xnEntity}
result = n.fText
proc tag*(n: PXmlNode): string {.inline.} =
@@ -181,6 +187,10 @@ proc add*(result: var string, n: PXmlNode, indent = 0, indWidth = 2) =
result.add("")
+ of xnEntity:
+ result.add('&')
+ result.add(n.fText)
+ result.add(';')
proc `$`*(n: PXmlNode): string =
## converts `n` into its string representation.
diff --git a/lib/pure/xmltreeparser.nim b/lib/pure/xmltreeparser.nim
old mode 100644
new mode 100755
index 5a48f9e8b3..b7a9ba54a2
--- a/lib/pure/xmltreeparser.nim
+++ b/lib/pure/xmltreeparser.nim
@@ -9,11 +9,87 @@
## This module parses an XML document and creates its XML tree representation.
-import streams, parsexml, xmltree
+import streams, parsexml, strtabs, xmltree, hxmlcommon
+type
+ EInvalidXml* = object of E_Base ## exception that is raised for invalid XML
+ errors*: seq[string] ## all detected parsing errors
-proc parse*(x: var TXmlParser, father: PXmlNode) =
+proc raiseInvalidXml(errors: seq[string]) =
+ var e: ref EInvalidXml
+ new(e)
+ e.msg = errors[0]
+ e.errors = errors
+ raise e
+
+proc addNode(father, son: PXmlNode) =
+ if son != nil: add(father, son)
+
+proc parse*(x: var TXmlParser, errors: var seq[string]): PXmlNode =
+ case x.kind
+ of xmlComment:
+ result = newComment(x.text)
+ next(x)
+ of xmlCharData, xmlWhitespace:
+ result = newText(x.text)
+ next(x)
+ of xmlPI, xmlSpecial:
+ # we just ignore processing instructions for now
+ next(x)
+ of xmlError:
+ errors.add(errorMsg(x))
+ next(x)
+ of xmlElementStart: ## ````
+ result = newElement(x.elementName)
+ next(x)
+ while true:
+ case x.kind
+ of xmlElementEnd:
+ if x.elementName == result.tag:
+ next(x)
+ else:
+ errors.add(errorMsg(x, "$1> expected" % result.tag))
+ # do not skip it here!
+ break
+ of xmlEof:
+ errors.add(errorMsg(x, "$1> expected" % result.tag))
+ break
+ else:
+ result.addNode(parse(x, errors))
+ of xmlElementEnd: ## ````
+ errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
+ of xmlElementOpen: ## ``' expected" % result.tag))
+ next(x)
+ break
+ of xmlAttribute, xmlElementClose:
+ errors.add(errorMsg(x, " expected")
+ next(x)
+ of xmlCData:
+ result = newCData(x.charData)
+ next(x)
+ of xmlEntity:
+ ## &entity;
+ ## XXX To implement!
+ next(x)
+ of xmlEof: nil
proc parseXml*(s: PStream, filename: string,
errors: var seq[string]): PXmlNode =
@@ -21,27 +97,31 @@ proc parseXml*(s: PStream, filename: string,
## occured parsing error is added to the `errors` sequence.
var x: TXmlParser
open(x, s, filename, {reportComments})
-
- result = newElement("html")
while true:
x.next()
case x.kind
- of xmlWhitespace: nil # just skip it
- of xmlComment:
- result.add(newComment(x.text))
-
+ of xmlElementOpen, xmlElementStart:
+ result = parse(x, errors)
+ break
+ of xmlComment, xmlWhitespace: nil # just skip it
+ of xmlError:
+ errors.add(errorMsg(x))
+ else:
+ errors.add(errorMsg(x, " expected")
+ break
close(x)
proc parseXml*(s: PStream): PXmlNode =
## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing
- ## errors are ignored.
+ ## errors are turned into an ``EInvalidXML`` exception.
var errors: seq[string] = @[]
result = parseXml(s, "unknown_html_doc", errors)
+ if errors.len > 0: raiseInvalidXMl(errors)
proc loadXml*(path: string, reportErrors = false): PXmlNode =
## Loads and parses XML from file specified by ``path``, and returns
## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are
- ## ``echo``ed.
+ ## ``echo``ed, otherwise an exception is thrown.
var s = newFileStream(path, fmRead)
if s == nil: raise newException(EIO, "Unable to read file: " & path)
@@ -49,4 +129,6 @@ proc loadXml*(path: string, reportErrors = false): PXmlNode =
result = parseXml(s, path, errors)
if reportErrors:
for msg in items(errors): echo(msg)
+ elif errors.len > 0:
+ raiseInvalidXMl(errors)