diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim old mode 100644 new mode 100755 diff --git a/lib/pure/httpserver.nim b/lib/pure/httpserver.nim old mode 100644 new mode 100755 diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim index a209e8be0c..598ae6c686 100755 --- a/lib/pure/parsexml.nim +++ b/lib/pure/parsexml.nim @@ -196,6 +196,12 @@ proc errorMsgExpected*(my: TXmlParser, tag: string): string = ## other error messages result = "$1($2, $3) Error: $4" % [ my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag] + +proc errorMsg*(my: TXmlParser, msg: string): string = + ## returns an error message with text `msg` in the same format as the + ## other error messages + result = "$1($2, $3) Error: $4" % [ + my.filename, $getLine(my), $getColumn(my), msg] proc markError(my: var TXmlParser, kind: TXmlError) {.inline.} = my.err = kind diff --git a/lib/pure/unidecode/gen.py b/lib/pure/unidecode/gen.py old mode 100644 new mode 100755 diff --git a/lib/pure/unidecode/unidecode.dat b/lib/pure/unidecode/unidecode.dat old mode 100644 new mode 100755 diff --git a/lib/pure/unidecode/unidecode.nim b/lib/pure/unidecode/unidecode.nim old mode 100644 new mode 100755 diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim old mode 100644 new mode 100755 diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim old mode 100644 new mode 100755 diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim old mode 100644 new mode 100755 index aeec842d7f..005969fc4b --- a/lib/pure/xmltree.nim +++ b/lib/pure/xmltree.nim @@ -18,13 +18,14 @@ type xnText, ## a text element xnElement, ## an element with 0 or more children xnCData, ## a CDATA node + xnEntity, ## an entity (like ``&thing;``) xnComment ## an XML comment PXmlAttributes* = PStringTable ## an alias for a string to string mapping TXmlNode {.pure, final, acyclic.} = object case k: TXmlNodeKind - of xnText, xnComment, xnCData: + of xnText, xnComment, xnCData, xnEntity: fText: string of xnElement: fTag: string @@ -59,10 +60,15 @@ proc newCData*(cdata: string): PXmlNode = result = newXmlNode(xnCData) result.fText = cdata +proc newEntity*(entity: string): PXmlNode = + ## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`. + result = newXmlNode(xnCData) + result.fText = cdata + proc text*(n: PXmlNode): string {.inline.} = - ## gets the associated text with the node `n`. `n` can be a CDATA, Text - ## or comment node. - assert n.k in {xnText, xnComment, xnCData} + ## gets the associated text with the node `n`. `n` can be a CDATA, Text, + ## comment, or entity node. + assert n.k in {xnText, xnComment, xnCData, xnEntity} result = n.fText proc tag*(n: PXmlNode): string {.inline.} = @@ -181,6 +187,10 @@ proc add*(result: var string, n: PXmlNode, indent = 0, indWidth = 2) = result.add("") + of xnEntity: + result.add('&') + result.add(n.fText) + result.add(';') proc `$`*(n: PXmlNode): string = ## converts `n` into its string representation. diff --git a/lib/pure/xmltreeparser.nim b/lib/pure/xmltreeparser.nim old mode 100644 new mode 100755 index 5a48f9e8b3..b7a9ba54a2 --- a/lib/pure/xmltreeparser.nim +++ b/lib/pure/xmltreeparser.nim @@ -9,11 +9,87 @@ ## This module parses an XML document and creates its XML tree representation. -import streams, parsexml, xmltree +import streams, parsexml, strtabs, xmltree, hxmlcommon +type + EInvalidXml* = object of E_Base ## exception that is raised for invalid XML + errors*: seq[string] ## all detected parsing errors -proc parse*(x: var TXmlParser, father: PXmlNode) = +proc raiseInvalidXml(errors: seq[string]) = + var e: ref EInvalidXml + new(e) + e.msg = errors[0] + e.errors = errors + raise e + +proc addNode(father, son: PXmlNode) = + if son != nil: add(father, son) + +proc parse*(x: var TXmlParser, errors: var seq[string]): PXmlNode = + case x.kind + of xmlComment: + result = newComment(x.text) + next(x) + of xmlCharData, xmlWhitespace: + result = newText(x.text) + next(x) + of xmlPI, xmlSpecial: + # we just ignore processing instructions for now + next(x) + of xmlError: + errors.add(errorMsg(x)) + next(x) + of xmlElementStart: ## ```` + result = newElement(x.elementName) + next(x) + while true: + case x.kind + of xmlElementEnd: + if x.elementName == result.tag: + next(x) + else: + errors.add(errorMsg(x, " expected" % result.tag)) + # do not skip it here! + break + of xmlEof: + errors.add(errorMsg(x, " expected" % result.tag)) + break + else: + result.addNode(parse(x, errors)) + of xmlElementEnd: ## ```` + errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName)) + of xmlElementOpen: ## ``' expected" % result.tag)) + next(x) + break + of xmlAttribute, xmlElementClose: + errors.add(errorMsg(x, " expected") + next(x) + of xmlCData: + result = newCData(x.charData) + next(x) + of xmlEntity: + ## &entity; + ## XXX To implement! + next(x) + of xmlEof: nil proc parseXml*(s: PStream, filename: string, errors: var seq[string]): PXmlNode = @@ -21,27 +97,31 @@ proc parseXml*(s: PStream, filename: string, ## occured parsing error is added to the `errors` sequence. var x: TXmlParser open(x, s, filename, {reportComments}) - - result = newElement("html") while true: x.next() case x.kind - of xmlWhitespace: nil # just skip it - of xmlComment: - result.add(newComment(x.text)) - + of xmlElementOpen, xmlElementStart: + result = parse(x, errors) + break + of xmlComment, xmlWhitespace: nil # just skip it + of xmlError: + errors.add(errorMsg(x)) + else: + errors.add(errorMsg(x, " expected") + break close(x) proc parseXml*(s: PStream): PXmlNode = ## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing - ## errors are ignored. + ## errors are turned into an ``EInvalidXML`` exception. var errors: seq[string] = @[] result = parseXml(s, "unknown_html_doc", errors) + if errors.len > 0: raiseInvalidXMl(errors) proc loadXml*(path: string, reportErrors = false): PXmlNode = ## Loads and parses XML from file specified by ``path``, and returns ## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are - ## ``echo``ed. + ## ``echo``ed, otherwise an exception is thrown. var s = newFileStream(path, fmRead) if s == nil: raise newException(EIO, "Unable to read file: " & path) @@ -49,4 +129,6 @@ proc loadXml*(path: string, reportErrors = false): PXmlNode = result = parseXml(s, path, errors) if reportErrors: for msg in items(errors): echo(msg) + elif errors.len > 0: + raiseInvalidXMl(errors)