further progress on the new XML processing modules

This commit is contained in:
Andreas Rumpf
2010-02-12 00:03:18 +01:00
parent f721ddd75b
commit 16c0beb27c
10 changed files with 112 additions and 14 deletions

0
lib/pure/htmlparser.nim Normal file → Executable file
View File

0
lib/pure/httpserver.nim Normal file → Executable file
View File

View File

@@ -196,6 +196,12 @@ proc errorMsgExpected*(my: TXmlParser, tag: string): string =
## other error messages
result = "$1($2, $3) Error: $4" % [
my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag]
proc errorMsg*(my: TXmlParser, msg: string): string =
## returns an error message with text `msg` in the same format as the
## other error messages
result = "$1($2, $3) Error: $4" % [
my.filename, $getLine(my), $getColumn(my), msg]
proc markError(my: var TXmlParser, kind: TXmlError) {.inline.} =
my.err = kind

0
lib/pure/unidecode/gen.py Normal file → Executable file
View File

0
lib/pure/unidecode/unidecode.dat Normal file → Executable file
View File

0
lib/pure/unidecode/unidecode.nim Normal file → Executable file
View File

0
lib/pure/xmldom.nim Normal file → Executable file
View File

0
lib/pure/xmldomparser.nim Normal file → Executable file
View File

18
lib/pure/xmltree.nim Normal file → Executable file
View File

@@ -18,13 +18,14 @@ type
xnText, ## a text element
xnElement, ## an element with 0 or more children
xnCData, ## a CDATA node
xnEntity, ## an entity (like ``&thing;``)
xnComment ## an XML comment
PXmlAttributes* = PStringTable ## an alias for a string to string mapping
TXmlNode {.pure, final, acyclic.} = object
case k: TXmlNodeKind
of xnText, xnComment, xnCData:
of xnText, xnComment, xnCData, xnEntity:
fText: string
of xnElement:
fTag: string
@@ -59,10 +60,15 @@ proc newCData*(cdata: string): PXmlNode =
result = newXmlNode(xnCData)
result.fText = cdata
proc newEntity*(entity: string): PXmlNode =
## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`.
result = newXmlNode(xnCData)
result.fText = cdata
proc text*(n: PXmlNode): string {.inline.} =
## gets the associated text with the node `n`. `n` can be a CDATA, Text
## or comment node.
assert n.k in {xnText, xnComment, xnCData}
## gets the associated text with the node `n`. `n` can be a CDATA, Text,
## comment, or entity node.
assert n.k in {xnText, xnComment, xnCData, xnEntity}
result = n.fText
proc tag*(n: PXmlNode): string {.inline.} =
@@ -181,6 +187,10 @@ proc add*(result: var string, n: PXmlNode, indent = 0, indWidth = 2) =
result.add("<![CDATA[")
result.add(n.fText)
result.add("]]>")
of xnEntity:
result.add('&')
result.add(n.fText)
result.add(';')
proc `$`*(n: PXmlNode): string =
## converts `n` into its string representation.

102
lib/pure/xmltreeparser.nim Normal file → Executable file
View File

@@ -9,11 +9,87 @@
## This module parses an XML document and creates its XML tree representation.
import streams, parsexml, xmltree
import streams, parsexml, strtabs, xmltree, hxmlcommon
type
EInvalidXml* = object of E_Base ## exception that is raised for invalid XML
errors*: seq[string] ## all detected parsing errors
proc parse*(x: var TXmlParser, father: PXmlNode) =
proc raiseInvalidXml(errors: seq[string]) =
var e: ref EInvalidXml
new(e)
e.msg = errors[0]
e.errors = errors
raise e
proc addNode(father, son: PXmlNode) =
if son != nil: add(father, son)
proc parse*(x: var TXmlParser, errors: var seq[string]): PXmlNode =
case x.kind
of xmlComment:
result = newComment(x.text)
next(x)
of xmlCharData, xmlWhitespace:
result = newText(x.text)
next(x)
of xmlPI, xmlSpecial:
# we just ignore processing instructions for now
next(x)
of xmlError:
errors.add(errorMsg(x))
next(x)
of xmlElementStart: ## ``<elem>``
result = newElement(x.elementName)
next(x)
while true:
case x.kind
of xmlElementEnd:
if x.elementName == result.tag:
next(x)
else:
errors.add(errorMsg(x, "</$1> expected" % result.tag))
# do not skip it here!
break
of xmlEof:
errors.add(errorMsg(x, "</$1> expected" % result.tag))
break
else:
result.addNode(parse(x, errors))
of xmlElementEnd: ## ``</elem>``
errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
of xmlElementOpen: ## ``<elem
result = newElement(x.elementName)
next(x)
result.attr = newStringTable()
while true:
case x.kind
of xmlAttribute:
result.attr[x.attrKey] = x.attrValue
next(x)
of xmlElementClose:
next(x)
break
of xmlError:
errors.add(errorMsg(x))
next(x)
break
else:
errors.add(errorMsg(x, "'>' expected" % result.tag))
next(x)
break
of xmlAttribute, xmlElementClose:
errors.add(errorMsg(x, "<some_tag> expected")
next(x)
of xmlCData:
result = newCData(x.charData)
next(x)
of xmlEntity:
## &entity;
## XXX To implement!
next(x)
of xmlEof: nil
proc parseXml*(s: PStream, filename: string,
errors: var seq[string]): PXmlNode =
@@ -21,27 +97,31 @@ proc parseXml*(s: PStream, filename: string,
## occured parsing error is added to the `errors` sequence.
var x: TXmlParser
open(x, s, filename, {reportComments})
result = newElement("html")
while true:
x.next()
case x.kind
of xmlWhitespace: nil # just skip it
of xmlComment:
result.add(newComment(x.text))
of xmlElementOpen, xmlElementStart:
result = parse(x, errors)
break
of xmlComment, xmlWhitespace: nil # just skip it
of xmlError:
errors.add(errorMsg(x))
else:
errors.add(errorMsg(x, "<some_tag> expected")
break
close(x)
proc parseXml*(s: PStream): PXmlNode =
## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing
## errors are ignored.
## errors are turned into an ``EInvalidXML`` exception.
var errors: seq[string] = @[]
result = parseXml(s, "unknown_html_doc", errors)
if errors.len > 0: raiseInvalidXMl(errors)
proc loadXml*(path: string, reportErrors = false): PXmlNode =
## Loads and parses XML from file specified by ``path``, and returns
## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are
## ``echo``ed.
## ``echo``ed, otherwise an exception is thrown.
var s = newFileStream(path, fmRead)
if s == nil: raise newException(EIO, "Unable to read file: " & path)
@@ -49,4 +129,6 @@ proc loadXml*(path: string, reportErrors = false): PXmlNode =
result = parseXml(s, path, errors)
if reportErrors:
for msg in items(errors): echo(msg)
elif errors.len > 0:
raiseInvalidXMl(errors)