mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-08 14:03:23 +00:00
new XML modules
This commit is contained in:
11
doc/lib.txt
11
doc/lib.txt
@@ -172,7 +172,16 @@ XML Processing
|
||||
This module implements the XML DOM Level 2.
|
||||
|
||||
* `xmldomparser <xmldomparser.html>`_
|
||||
This module parses a XML Document into a XML DOM Document representation.
|
||||
This module parses an XML Document into a XML DOM Document representation.
|
||||
|
||||
* `xmltree <xmltree.html>`_
|
||||
A simple XML tree. More efficient and simpler than the DOM.
|
||||
|
||||
* `xmltreeparser <xmltreeparser.html>`_
|
||||
This module parses an XML document and creates its XML tree representation.
|
||||
|
||||
* `htmlparser <htmlparser.html>`_
|
||||
This module parses an HTML document and creates its XML tree representation.
|
||||
|
||||
|
||||
Code generation
|
||||
|
||||
247
lib/pure/htmlparser.nim
Normal file
247
lib/pure/htmlparser.nim
Normal file
@@ -0,0 +1,247 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2010 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## This module parses an HTML document and creates its XML tree representation.
|
||||
## It is supposed to handle the *wild* HTML the real world uses.
|
||||
##
|
||||
## It can be used to parse a wild HTML document and output it as valid XHTML
|
||||
## document (if you are lucky):
|
||||
##
|
||||
## .. code-block:: nimrod
|
||||
##
|
||||
## echo loadHtml("mydirty.html")
|
||||
##
|
||||
##
|
||||
## Every tag in the resulting tree is in lower case.
|
||||
##
|
||||
## **Note:** The resulting ``PXmlNode``s already use the ``clientData`` field,
|
||||
## so it cannot be used by clients of this library.
|
||||
|
||||
import streams, parsexml, xmltree
|
||||
|
||||
type
|
||||
THtmlTag* = enum ## list of all supported HTML tags; order will always be
|
||||
## alphabetically
|
||||
tagUnknown, ## unknown HTML element
|
||||
tagA, ## the HTML ``a`` element
|
||||
tagAcronym, ## the HTML ``acronym`` element
|
||||
tagAddress, ## the HTML ``address`` element
|
||||
tagArea, ## the HTML ``area`` element
|
||||
tagB, ## the HTML ``b`` element
|
||||
tagBase, ## the HTML ``base`` element
|
||||
tagBig, ## the HTML ``big`` element
|
||||
tagBlockquote, ## the HTML ``blockquote`` element
|
||||
tagBody, ## the HTML ``body`` element
|
||||
tagBr, ## the HTML ``br`` element
|
||||
tagButton, ## the HTML ``button`` element
|
||||
tagCaption, ## the HTML ``caption`` element
|
||||
tagCite, ## the HTML ``cite`` element
|
||||
tagCode, ## the HTML ``code`` element
|
||||
tagCol, ## the HTML ``col`` element
|
||||
tagColgroup, ## the HTML ``colgroup`` element
|
||||
tagDd, ## the HTML ``dd`` element
|
||||
tagDel, ## the HTML ``del`` element
|
||||
tagDfn, ## the HTML ``dfn`` element
|
||||
tagDiv, ## the HTML ``div`` element
|
||||
tagDl, ## the HTML ``dl`` element
|
||||
tagDt, ## the HTML ``dt`` element
|
||||
tagEm, ## the HTML ``em`` element
|
||||
tagFieldset, ## the HTML ``fieldset`` element
|
||||
tagForm, ## the HTML ``form`` element
|
||||
tagH1, ## the HTML ``h1`` element
|
||||
tagH2, ## the HTML ``h2`` element
|
||||
tagH3, ## the HTML ``h3`` element
|
||||
tagH4, ## the HTML ``h4`` element
|
||||
tagH5, ## the HTML ``h5`` element
|
||||
tagH6, ## the HTML ``h6`` element
|
||||
tagHead, ## the HTML ``head`` element
|
||||
tagHtml, ## the HTML ``html`` element
|
||||
tagHr, ## the HTML ``hr`` element
|
||||
tagI, ## the HTML ``i`` element
|
||||
tagImg, ## the HTML ``img`` element
|
||||
tagInput, ## the HTML ``input`` element
|
||||
tagIns, ## the HTML ``ins`` element
|
||||
tagKbd, ## the HTML ``kbd`` element
|
||||
tagLabel, ## the HTML ``label`` element
|
||||
tagLegend, ## the HTML ``legend`` element
|
||||
tagLi, ## the HTML ``li`` element
|
||||
tagLink, ## the HTML ``link`` element
|
||||
tagMap, ## the HTML ``map`` element
|
||||
tagMeta, ## the HTML ``meta`` element
|
||||
tagNoscript, ## the HTML ``noscript`` element
|
||||
tagObject, ## the HTML ``object`` element
|
||||
tagOl, ## the HTML ``ol`` element
|
||||
tagOptgroup, ## the HTML ``optgroup`` element
|
||||
tagOption, ## the HTML ``option`` element
|
||||
tagP, ## the HTML ``p`` element
|
||||
tagParam, ## the HTML ``param`` element
|
||||
tagPre, ## the HTML ``pre`` element
|
||||
tagQ, ## the HTML ``q`` element
|
||||
tagSamp, ## the HTML ``samp`` element
|
||||
tagScript, ## the HTML ``script`` element
|
||||
tagSelect, ## the HTML ``select`` element
|
||||
tagSmall, ## the HTML ``small`` element
|
||||
tagSpan, ## the HTML ``span`` element
|
||||
tagStrong, ## the HTML ``strong`` element
|
||||
tagStyle, ## the HTML ``style`` element
|
||||
tagSub, ## the HTML ``sub`` element
|
||||
tagSup, ## the HTML ``sup`` element
|
||||
tagTable, ## the HTML ``table`` element
|
||||
tagTbody, ## the HTML ``tbody`` element
|
||||
tagTd, ## the HTML ``td`` element
|
||||
tagTextarea, ## the HTML ``textarea`` element
|
||||
tagTfoot, ## the HTML ``tfoot`` element
|
||||
tagTh, ## the HTML ``th`` element
|
||||
tagThead, ## the HTML ``thead`` element
|
||||
tagTitle, ## the HTML ``title`` element
|
||||
tagTr, ## the HTML ``tr`` element
|
||||
tagTt, ## the HTML ``tt`` element
|
||||
tagUl, ## the HTML ``ul`` element
|
||||
tagVar ## the HTML ``var`` element
|
||||
|
||||
const
|
||||
tagStrs = [
|
||||
"a", "acronym", "address", "area", "b", "base", "big", "blockquote",
|
||||
"body", "br", "button", "caption", "cite", "code", "col", "colgroup",
|
||||
"dd", "del", "dfn", "div", "dl", "dt", "em", "fieldset",
|
||||
"form", "h1", "h2", "h3", "h4", "h5", "h6", "head", "html", "hr",
|
||||
"i", "img", "input", "ins", "kbd", "label", "legend", "li", "link",
|
||||
"map", "meta", "noscript", "object", "ol", "optgroup", "option",
|
||||
"p", "param", "pre", "q", "samp", "script", "select", "small",
|
||||
"span", "strong", "style", "sub", "sup", "table", "tbody", "td",
|
||||
"textarea", "tfoot", "th", "thead", "title", "tr", "tt", "ul", "var"
|
||||
]
|
||||
|
||||
proc binaryStrSearch(x: openarray[string], y: string): int =
|
||||
## XXX put this into the library somewhere!
|
||||
var a = 0
|
||||
var b = len(x) - 1
|
||||
while a <= b:
|
||||
var mid = (a + b) div 2
|
||||
var c = cmp(x[mid], y)
|
||||
if c < 0:
|
||||
a = mid + 1
|
||||
elif c > 0:
|
||||
b = mid - 1
|
||||
else:
|
||||
return mid
|
||||
result = - 1
|
||||
|
||||
proc htmlTag*(n: PXmlNode): THtmlTag =
|
||||
## gets `n`'s tag as a ``THtmlTag``. Even though results are cached, this is
|
||||
## can be more expensive than comparing ``tag`` directly to a string.
|
||||
if n.clientData == 0:
|
||||
n.clientData = binaryStrSearch(tagStrs, n.tag)+1
|
||||
result = THtmlTag(n.clientData)
|
||||
|
||||
proc parseElement(x: var TXmlParser, doc: var PDocument): PElement =
|
||||
var n = doc.createElement("")
|
||||
|
||||
while True:
|
||||
case x.kind()
|
||||
of xmlEof:
|
||||
break
|
||||
of xmlElementStart:
|
||||
if n.tagName() != "":
|
||||
n.appendChild(parseElement(x, doc))
|
||||
else:
|
||||
n = doc.createElement(x.elementName)
|
||||
of xmlElementOpen:
|
||||
if n.tagName() != "":
|
||||
n.appendChild(parseElement(x, doc))
|
||||
else:
|
||||
if x.elementName.contains(':'):
|
||||
#TODO: NamespaceURI
|
||||
n = doc.createElementNS("nil", x.elementName)
|
||||
else:
|
||||
n = doc.createElement(x.elementName)
|
||||
|
||||
of xmlElementEnd:
|
||||
if x.elementName == n.nodeName:
|
||||
# n.normalize() # Remove any whitespace etc.
|
||||
return n
|
||||
else: #The wrong element is ended
|
||||
raise newException(EMismatchedTag, "Mismatched tag at line " &
|
||||
$x.getLine() & " column " & $x.getColumn)
|
||||
|
||||
of xmlCharData:
|
||||
n.appendChild(parseText(x, doc))
|
||||
of xmlAttribute:
|
||||
if x.attrKey.contains(':'):
|
||||
#TODO: NamespaceURI
|
||||
n.setAttributeNS("nil", x.attrKey, x.attrValue)
|
||||
else:
|
||||
n.setAttribute(x.attrKey, x.attrValue)
|
||||
of xmlCData:
|
||||
n.appendChild(doc.createCDATASection(x.charData()))
|
||||
of xmlComment:
|
||||
n.appendChild(doc.createComment(x.charData()))
|
||||
of xmlPI:
|
||||
n.appendChild(doc.createProcessingInstruction(x.PIName(), x.PIRest()))
|
||||
|
||||
of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
|
||||
# Unused 'events'
|
||||
|
||||
else:
|
||||
raise newException(EParserError, "Unexpected XML Parser event")
|
||||
x.next()
|
||||
|
||||
raise newException(EMismatchedTag,
|
||||
"Mismatched tag at line " & $x.getLine() & " column " & $x.getColumn)
|
||||
|
||||
|
||||
proc parse*(x: var TXmlParser, father: PXmlNode) =
|
||||
|
||||
|
||||
proc parseHtml*(s: PStream, filename: string,
|
||||
errors: var seq[string]): PXmlNode =
|
||||
## parses the HTML from stream `s` and returns a ``PXmlNode``. Every
|
||||
## occured parsing error is added to the `errors` sequence.
|
||||
var x: TXmlParser
|
||||
open(x, s, filename, {reportComments})
|
||||
|
||||
result = newElement("html")
|
||||
while true:
|
||||
x.next()
|
||||
case x.kind
|
||||
of xmlWhitespace: nil # just skip it
|
||||
of xmlComment:
|
||||
result.add(newComment(x.text))
|
||||
|
||||
while True:
|
||||
x.next()
|
||||
case x.kind
|
||||
of xmlEof: break
|
||||
of xmlElementStart, xmlElementOpen:
|
||||
var el: PElement = parseElement(x, XmlDoc)
|
||||
XmlDoc = dom.createDocument(el)
|
||||
of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
|
||||
# Unused 'events'
|
||||
else:
|
||||
raise newException(EParserError, "Unexpected XML Parser event")
|
||||
close(x)
|
||||
|
||||
proc parseHtml*(s: PStream): PXmlNode =
|
||||
## parses the HTML from stream `s` and returns a ``PXmlNode``. All parsing
|
||||
## errors are ignored.
|
||||
var errors: seq[string] = @[]
|
||||
result = parseHtml(s, "unknown_html_doc", errors)
|
||||
|
||||
proc loadHtml*(path: string, reportErrors = false): PXmlNode =
|
||||
## Loads and parses HTML from file specified by ``path``, and returns
|
||||
## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are
|
||||
## ``echo``ed.
|
||||
var s = newFileStream(path, fmRead)
|
||||
if s == nil: raise newException(EIO, "Unable to read file: " & path)
|
||||
|
||||
var errors: seq[string] = @[]
|
||||
result = parseHtml(s, path, errors)
|
||||
if reportErrors:
|
||||
for msg in items(errors): echo(msg)
|
||||
|
||||
@@ -364,7 +364,7 @@ proc parsePI(my: var TXmlParser) =
|
||||
break
|
||||
add(my.b, '?')
|
||||
inc(pos)
|
||||
of '\c':
|
||||
of '\c':
|
||||
# the specification says that CR-LF, CR are to be transformed to LF
|
||||
pos = lexbase.HandleCR(my, pos)
|
||||
buf = my.buf
|
||||
|
||||
@@ -127,13 +127,13 @@ template `=~` *(s: string, pattern: TRegEx): expr =
|
||||
##
|
||||
## if line =~ re"\s*(\w+)\s*\=\s*(\w+)":
|
||||
## # matches a key=value pair:
|
||||
## echo("Key: ", matches[1])
|
||||
## echo("Value: ", matches[2])
|
||||
## echo("Key: ", matches[0])
|
||||
## echo("Value: ", matches[1])
|
||||
## elif line =~ re"\s*(\#.*)":
|
||||
## # matches a comment
|
||||
## # note that the implicit ``matches`` array is different from the
|
||||
## # ``matches`` array of the first branch
|
||||
## echo("comment: ", matches[1])
|
||||
## echo("comment: ", matches[0])
|
||||
## else:
|
||||
## echo("syntax error")
|
||||
##
|
||||
|
||||
@@ -9,33 +9,30 @@
|
||||
|
||||
|
||||
import strutils
|
||||
## This module implements the XML DOM Level 2
|
||||
## This module implements XML DOM Level 2 Core specification(http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html)
|
||||
|
||||
|
||||
#http://www.w3.org/TR/2000/REC-DOM-Level-2-Core-20001113/core.html
|
||||
#DOMString = String
|
||||
#DOMTimeStamp = int16 ??
|
||||
|
||||
#DECLARATIONS
|
||||
|
||||
#Exceptions
|
||||
type
|
||||
EDOMException* = object of E_Base #Base exception object for all DOM Exceptions
|
||||
EDOMStringSizeErr* = object of EDOMException #If the specified range of text does not fit into a DOMString
|
||||
#Currently not used(Since DOMString is just string)
|
||||
EHierarchyRequestErr* = object of EDOMException #If any node is inserted somewhere it doesn't belong
|
||||
EIndexSizeErr* = object of EDOMException #If index or size is negative, or greater than the allowed value
|
||||
EInuseAttributeErr* = object of EDOMException #If an attempt is made to add an attribute that is already in use elsewhere
|
||||
EInvalidAccessErr* = object of EDOMException #If a parameter or an operation is not supported by the underlying object.
|
||||
EInvalidCharacterErr* = object of EDOMException #This exception is raised when a string parameter contains an illegal character
|
||||
EInvalidModificationErr* = object of EDOMException #If an attempt is made to modify the type of the underlying object.
|
||||
EInvalidStateErr* = object of EDOMException #If an attempt is made to use an object that is not, or is no longer, usable.
|
||||
ENamespaceErr* = object of EDOMException #If an attempt is made to create or change an object in a way which is incorrect with regard to namespaces.
|
||||
ENotFoundErr* = object of EDOMException #If an attempt is made to reference a node in a context where it does not exist
|
||||
ENotSupportedErr* = object of EDOMException #If the implementation does not support the requested type of object or operation.
|
||||
ENoDataAllowedErr* = object of EDOMException #If data is specified for a node which does not support data
|
||||
ENoModificationAllowedErr* = object of EDOMException #If an attempt is made to modify an object where modifications are not allowed
|
||||
ESyntaxErr* = object of EDOMException #If an invalid or illegal string is specified.
|
||||
EWrongDocumentErr* = object of EDOMException #If a node is used in a different document than the one that created it (that doesn't support it)
|
||||
EDOMException* = object of E_Base ## Base exception object for all DOM Exceptions
|
||||
EDOMStringSizeErr* = object of EDOMException ## If the specified range of text does not fit into a DOMString
|
||||
## Currently not used(Since DOMString is just string)
|
||||
EHierarchyRequestErr* = object of EDOMException ## If any node is inserted somewhere it doesn't belong
|
||||
EIndexSizeErr* = object of EDOMException ## If index or size is negative, or greater than the allowed value
|
||||
EInuseAttributeErr* = object of EDOMException ## If an attempt is made to add an attribute that is already in use elsewhere
|
||||
EInvalidAccessErr* = object of EDOMException ## If a parameter or an operation is not supported by the underlying object.
|
||||
EInvalidCharacterErr* = object of EDOMException ## This exception is raised when a string parameter contains an illegal character
|
||||
EInvalidModificationErr* = object of EDOMException ## If an attempt is made to modify the type of the underlying object.
|
||||
EInvalidStateErr* = object of EDOMException ## If an attempt is made to use an object that is not, or is no longer, usable.
|
||||
ENamespaceErr* = object of EDOMException ## If an attempt is made to create or change an object in a way which is incorrect with regard to namespaces.
|
||||
ENotFoundErr* = object of EDOMException ## If an attempt is made to reference a node in a context where it does not exist
|
||||
ENotSupportedErr* = object of EDOMException ## If the implementation does not support the requested type of object or operation.
|
||||
ENoDataAllowedErr* = object of EDOMException ## If data is specified for a node which does not support data
|
||||
ENoModificationAllowedErr* = object of EDOMException ## If an attempt is made to modify an object where modifications are not allowed
|
||||
ESyntaxErr* = object of EDOMException ## If an invalid or illegal string is specified.
|
||||
EWrongDocumentErr* = object of EDOMException ## If a node is used in a different document than the one that created it (that doesn't support it)
|
||||
|
||||
template newException(exceptn, message: expr): expr =
|
||||
block: # open a new scope
|
||||
@@ -65,24 +62,24 @@ type
|
||||
Feature = tuple[name: string, version: string]
|
||||
PDOMImplementation* = ref DOMImplementation
|
||||
DOMImplementation = object
|
||||
Features: seq[Feature] #Read-Only
|
||||
Features: seq[Feature] # Read-Only
|
||||
|
||||
PNode* = ref Node
|
||||
Node = object
|
||||
attributes: seq[PAttr] #Read-only
|
||||
childNodes*: seq[PNode] #Read-only
|
||||
FLocalName: string #Read-only
|
||||
FNamespaceURI: string #Read-only
|
||||
FNodeName: string #Read-only
|
||||
attributes*: seq[PAttr]
|
||||
childNodes*: seq[PNode]
|
||||
FLocalName: string # Read-only
|
||||
FNamespaceURI: string # Read-only
|
||||
FNodeName: string # Read-only
|
||||
nodeValue*: string
|
||||
FNodeType: int #Read-only
|
||||
FOwnerDocument: PDocument #Read-Only
|
||||
FParentNode: PNode #Read-Only
|
||||
FNodeType: int # Read-only
|
||||
FOwnerDocument: PDocument # Read-Only
|
||||
FParentNode: PNode # Read-Only
|
||||
prefix*: string # Setting this should change some values... TODO!
|
||||
|
||||
PElement* = ref Element
|
||||
Element = object of Node
|
||||
FTagName: string #Read-only
|
||||
FTagName: string # Read-only
|
||||
|
||||
PCharacterData = ref CharacterData
|
||||
CharacterData = object of Node
|
||||
@@ -90,15 +87,15 @@ type
|
||||
|
||||
PDocument* = ref Document
|
||||
Document = object of Node
|
||||
FImplementation: PDOMImplementation #Read-only
|
||||
FDocumentElement: PElement #Read-only
|
||||
FImplementation: PDOMImplementation # Read-only
|
||||
FDocumentElement: PElement # Read-only
|
||||
|
||||
PAttr* = ref Attr
|
||||
Attr = object of Node
|
||||
FName: string #Read-only
|
||||
FSpecified: bool #Read-only
|
||||
FName: string # Read-only
|
||||
FSpecified: bool # Read-only
|
||||
value*: string
|
||||
FOwnerElement: PElement #Read-only
|
||||
FOwnerElement: PElement # Read-only
|
||||
|
||||
PDocumentFragment* = ref DocumentFragment
|
||||
DocumentFragment = object of Node
|
||||
@@ -115,18 +112,18 @@ type
|
||||
PProcessingInstruction* = ref ProcessingInstruction
|
||||
ProcessingInstruction = object of Node
|
||||
data*: string
|
||||
FTarget: string #Read-only
|
||||
FTarget: string # Read-only
|
||||
|
||||
#DOMImplementation
|
||||
# DOMImplementation
|
||||
proc getDOM*(): PDOMImplementation =
|
||||
##Returns a DOMImplementation
|
||||
## Returns a DOMImplementation
|
||||
var DOMImpl: PDOMImplementation
|
||||
new(DOMImpl)
|
||||
DOMImpl.Features = @[(name: "core", version: "2.0"), (name: "core", version: "1.0"), (name: "XML", version: "2.0")]
|
||||
return DOMImpl
|
||||
|
||||
proc createDocument*(dom: PDOMImplementation, namespaceURI: string, qualifiedName: string): PDocument =
|
||||
##Creates an XML Document object of the specified type with its document element.
|
||||
## Creates an XML Document object of the specified type with its document element.
|
||||
var doc: PDocument
|
||||
new(doc)
|
||||
doc.FNamespaceURI = namespaceURI
|
||||
@@ -142,8 +139,9 @@ proc createDocument*(dom: PDOMImplementation, namespaceURI: string, qualifiedNam
|
||||
return doc
|
||||
|
||||
proc createDocument*(dom: PDOMImplementation, n: PElement): PDocument =
|
||||
##Creates an XML Document object of the specified type with its document element.
|
||||
#This procedure is not in the specification, it's provided for the parser.
|
||||
## Creates an XML Document object of the specified type with its document element.
|
||||
|
||||
# This procedure is not in the specification, it's provided for the parser.
|
||||
var doc: PDocument
|
||||
new(doc)
|
||||
doc.FDocumentElement = n
|
||||
@@ -153,7 +151,7 @@ proc createDocument*(dom: PDOMImplementation, n: PElement): PDocument =
|
||||
return doc
|
||||
|
||||
proc hasFeature*(dom: PDOMImplementation, feature: string, version: string = ""): bool =
|
||||
##Returns ``true`` if this ``version`` of the DomImplementation implements ``feature``, otherwise ``false``
|
||||
## Returns ``true`` if this ``version`` of the DomImplementation implements ``feature``, otherwise ``false``
|
||||
for iName, iVersion in items(dom.Features):
|
||||
if iName == feature:
|
||||
if version == "":
|
||||
@@ -164,8 +162,8 @@ proc hasFeature*(dom: PDOMImplementation, feature: string, version: string = "")
|
||||
return False
|
||||
|
||||
|
||||
#Document
|
||||
#Attributes
|
||||
# Document
|
||||
# Attributes
|
||||
|
||||
proc implementation*(doc: PDocument): PDOMImplementation =
|
||||
return doc.FImplementation
|
||||
@@ -173,9 +171,9 @@ proc implementation*(doc: PDocument): PDOMImplementation =
|
||||
proc documentElement*(doc: PDocument): PElement =
|
||||
return doc.FDocumentElement
|
||||
|
||||
#Internal procedures
|
||||
# Internal procedures
|
||||
proc findNodes(nl: PNode, name: string): seq[PNode] =
|
||||
#Made for getElementsByTagName
|
||||
# Made for getElementsByTagName
|
||||
var r: seq[PNode] = @[]
|
||||
if nl.childNodes == nil: return @[]
|
||||
if nl.childNodes.len() == 0: return @[]
|
||||
@@ -192,7 +190,7 @@ proc findNodes(nl: PNode, name: string): seq[PNode] =
|
||||
return r
|
||||
|
||||
proc findNodesNS(nl: PNode, namespaceURI: string, localName: string): seq[PNode] =
|
||||
#Made for getElementsByTagNameNS
|
||||
# Made for getElementsByTagNameNS
|
||||
var r: seq[PNode] = @[]
|
||||
if nl.childNodes == nil: return @[]
|
||||
if nl.childNodes.len() == 0: return @[]
|
||||
@@ -211,10 +209,10 @@ proc findNodesNS(nl: PNode, namespaceURI: string, localName: string): seq[PNode]
|
||||
|
||||
#Procedures
|
||||
proc createAttribute*(doc: PDocument, name: string): PAttr =
|
||||
##Creates an Attr of the given name. Note that the Attr instance can then be set on an Element using the setAttributeNode method.
|
||||
##To create an attribute with a qualified name and namespace URI, use the createAttributeNS method.
|
||||
## Creates an Attr of the given name. Note that the Attr instance can then be set on an Element using the setAttributeNode method.
|
||||
## To create an attribute with a qualified name and namespace URI, use the createAttributeNS method.
|
||||
|
||||
#Check if name contains illegal characters
|
||||
# Check if name contains illegal characters
|
||||
if illegalChars in name:
|
||||
raise newException(EInvalidCharacterErr, "Invalid character")
|
||||
|
||||
@@ -230,12 +228,12 @@ proc createAttribute*(doc: PDocument, name: string): PAttr =
|
||||
return AttrNode
|
||||
|
||||
proc createAttributeNS*(doc: PDocument, namespaceURI: string, qualifiedName: string): PAttr =
|
||||
##Creates an attribute of the given qualified name and namespace URI
|
||||
## Creates an attribute of the given qualified name and namespace URI
|
||||
|
||||
#Check if name contains illegal characters
|
||||
# Check if name contains illegal characters
|
||||
if illegalChars in namespaceURI or illegalChars in qualifiedName:
|
||||
raise newException(EInvalidCharacterErr, "Invalid character")
|
||||
#Exceptions
|
||||
# Exceptions
|
||||
if qualifiedName.contains(':'):
|
||||
if namespaceURI == nil or namespaceURI == "":
|
||||
raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil")
|
||||
@@ -264,17 +262,17 @@ proc createAttributeNS*(doc: PDocument, namespaceURI: string, qualifiedName: str
|
||||
return AttrNode
|
||||
|
||||
proc createCDATASection*(doc: PDocument, data: string): PCDATASection =
|
||||
##Creates a CDATASection node whose value is the specified string.
|
||||
## Creates a CDATASection node whose value is the specified string.
|
||||
var CData: PCDATASection
|
||||
new(CData)
|
||||
CData.data = data
|
||||
CData.nodeValue = data
|
||||
CData.FNodeName = "#text" #Not sure about this, but this is technically a TextNode
|
||||
CData.FNodeName = "#text" # Not sure about this, but this is technically a TextNode
|
||||
CData.FNodeType = CDataSectionNode
|
||||
return CData
|
||||
|
||||
proc createComment*(doc: PDocument, data: string): PComment =
|
||||
##Creates a Comment node given the specified string.
|
||||
## Creates a Comment node given the specified string.
|
||||
var Comm: PComment
|
||||
new(Comm)
|
||||
Comm.data = data
|
||||
@@ -284,15 +282,15 @@ proc createComment*(doc: PDocument, data: string): PComment =
|
||||
return Comm
|
||||
|
||||
proc createDocumentFragment*(doc: PDocument): PDocumentFragment =
|
||||
##Creates an empty DocumentFragment object.
|
||||
## Creates an empty DocumentFragment object.
|
||||
var DF: PDocumentFragment
|
||||
new(DF)
|
||||
return DF
|
||||
|
||||
proc createElement*(doc: PDocument, tagName: string): PElement =
|
||||
##Creates an element of the type specified.
|
||||
## Creates an element of the type specified.
|
||||
|
||||
#Check if name contains illegal characters
|
||||
# Check if name contains illegal characters
|
||||
if illegalChars in tagName:
|
||||
raise newException(EInvalidCharacterErr, "Invalid character")
|
||||
|
||||
@@ -311,7 +309,7 @@ proc createElement*(doc: PDocument, tagName: string): PElement =
|
||||
return elNode
|
||||
|
||||
proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: string): PElement =
|
||||
##Creates an element of the given qualified name and namespace URI.
|
||||
## Creates an element of the given qualified name and namespace URI.
|
||||
if qualifiedName.contains(':'):
|
||||
if namespaceURI == nil or namespaceURI == "":
|
||||
raise newException(ENamespaceErr, "When qualifiedName contains a prefix namespaceURI cannot be nil")
|
||||
@@ -319,7 +317,7 @@ proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: strin
|
||||
raise newException(ENamespaceErr,
|
||||
"When the namespace prefix is \"xml\" namespaceURI has to be \"http://www.w3.org/XML/1998/namespace\"")
|
||||
|
||||
#Check if name contains illegal characters
|
||||
# Check if name contains illegal characters
|
||||
if illegalChars in namespaceURI or illegalChars in qualifiedName:
|
||||
raise newException(EInvalidCharacterErr, "Invalid character")
|
||||
|
||||
@@ -342,7 +340,7 @@ proc createElementNS*(doc: PDocument, namespaceURI: string, qualifiedName: strin
|
||||
return elNode
|
||||
|
||||
proc createProcessingInstruction*(doc: PDocument, target: string, data: string): PProcessingInstruction =
|
||||
##Creates a ProcessingInstruction node given the specified name and data strings.
|
||||
## Creates a ProcessingInstruction node given the specified name and data strings.
|
||||
|
||||
#Check if name contains illegal characters
|
||||
if illegalChars in target:
|
||||
@@ -356,7 +354,7 @@ proc createProcessingInstruction*(doc: PDocument, target: string, data: string):
|
||||
return PI
|
||||
|
||||
proc createTextNode*(doc: PDocument, data: string): PText = #Propably TextNode
|
||||
##Creates a Text node given the specified string.
|
||||
## Creates a Text node given the specified string.
|
||||
var txtNode: PText
|
||||
new(txtNode)
|
||||
txtNode.data = data
|
||||
@@ -371,8 +369,8 @@ discard """proc getElementById*(doc: PDocument, elementId: string): PElement =
|
||||
#TODO"""
|
||||
|
||||
proc getElementsByTagName*(doc: PDocument, tagName: string): seq[PNode] =
|
||||
##Returns a NodeList of all the Elements with a given tag name in
|
||||
##the order in which they are encountered in a preorder traversal of the Document tree.
|
||||
## Returns a NodeList of all the Elements with a given tag name in
|
||||
## the order in which they are encountered in a preorder traversal of the Document tree.
|
||||
var result: seq[PNode] = @[]
|
||||
if doc.FDocumentElement.FNodeName == tagName or tagName == "*":
|
||||
result.add(doc.FDocumentElement)
|
||||
@@ -381,8 +379,8 @@ proc getElementsByTagName*(doc: PDocument, tagName: string): seq[PNode] =
|
||||
return result
|
||||
|
||||
proc getElementsByTagNameNS*(doc: PDocument, namespaceURI: string, localName: string): seq[PNode] =
|
||||
##Returns a NodeList of all the Elements with a given localName and namespaceURI
|
||||
##in the order in which they are encountered in a preorder traversal of the Document tree.
|
||||
## Returns a NodeList of all the Elements with a given localName and namespaceURI
|
||||
## in the order in which they are encountered in a preorder traversal of the Document tree.
|
||||
var result: seq[PNode] = @[]
|
||||
if doc.FDocumentElement.FLocalName == localName or localName == "*":
|
||||
if doc.FDocumentElement.FNamespaceURI == namespaceURI or namespaceURI == "*":
|
||||
@@ -450,57 +448,76 @@ proc importNode*(doc: PDocument, importedNode: PNode, deep: bool): PNode =
|
||||
|
||||
# Node
|
||||
# Attributes
|
||||
proc Attributes*(n: PNode): seq[PAttr] =
|
||||
if n.attributes == nil: n.attributes = @[] # Initialize the sequence if it's nil
|
||||
return n.attributes
|
||||
|
||||
proc firstChild*(n: PNode): PNode =
|
||||
## Returns this node's first child
|
||||
|
||||
if n.childNodes.len() > 0:
|
||||
return n.childNodes[0]
|
||||
else:
|
||||
return nil
|
||||
|
||||
proc lastChild*(n: PNode): PNode =
|
||||
## Returns this node's last child
|
||||
|
||||
if n.childNodes.len() > 0:
|
||||
return n.childNodes[n.childNodes.len() - 1]
|
||||
else:
|
||||
return nil
|
||||
|
||||
proc localName*(n: PNode): string =
|
||||
## Returns this nodes local name
|
||||
|
||||
return n.FLocalName
|
||||
|
||||
proc namespaceURI*(n: PNode): string =
|
||||
## Returns this nodes namespace URI
|
||||
|
||||
return n.FNamespaceURI
|
||||
|
||||
proc nextSibling*(n: PNode): PNode =
|
||||
## Returns the next sibling of this node
|
||||
|
||||
var nLow: int = low(n.FParentNode.childNodes)
|
||||
var nHigh: int = high(n.FParentNode.childNodes)
|
||||
for i in nLow..nHigh:
|
||||
if n.FParentNode.childNodes[i] == n: # HAVE TO TEST this line, not sure if ``==`` will work
|
||||
if n.FParentNode.childNodes[i] == n:
|
||||
return n.FParentNode.childNodes[i + 1]
|
||||
return nil
|
||||
|
||||
proc nodeName*(n: PNode): string =
|
||||
## Returns the name of this node
|
||||
|
||||
return n.FNodeName
|
||||
|
||||
proc nodeType*(n: PNode): int =
|
||||
## Returns the type of this node
|
||||
|
||||
return n.FNodeType
|
||||
|
||||
proc ownerDocument*(n: PNode): PDocument =
|
||||
## Returns the owner document of this node
|
||||
|
||||
return n.FOwnerDocument
|
||||
|
||||
proc parentNode*(n: PNode): PNode =
|
||||
## Returns the parent node of this node
|
||||
|
||||
return n.FParentNode
|
||||
|
||||
proc previousSibling*(n: PNode): PNode =
|
||||
## Returns the previous sibling of this node
|
||||
|
||||
var nLow: int = low(n.FParentNode.childNodes)
|
||||
var nHigh: int = high(n.FParentNode.childNodes)
|
||||
for i in nLow..nHigh:
|
||||
if n.FParentNode.childNodes[i] == n: # HAVE TO TEST this line, not sure if ``==`` will work
|
||||
if n.FParentNode.childNodes[i] == n:
|
||||
return n.FParentNode.childNodes[i - 1]
|
||||
return nil
|
||||
|
||||
proc `prefix=`*(n: var PNode, value: string) =
|
||||
## Modifies the prefix of this node
|
||||
|
||||
# Setter
|
||||
# Check if name contains illegal characters
|
||||
if illegalChars in value:
|
||||
@@ -532,8 +549,11 @@ proc appendChild*(n: PNode, newChild: PNode) =
|
||||
## Adds the node newChild to the end of the list of children of this node.
|
||||
## If the newChild is already in the tree, it is first removed.
|
||||
|
||||
# TODO - Check if n contains newChild
|
||||
# TODO - Exceptions
|
||||
# Check if n contains newChild
|
||||
if n.childNodes != nil:
|
||||
for i in low(n.childNodes)..high(n.childNodes):
|
||||
if n.childNodes[i] == newChild:
|
||||
raise newException(EHierarchyRequestErr, "The node to append is already in this nodes children.")
|
||||
|
||||
# Check if newChild is from this nodes document
|
||||
if n.FOwnerDocument != newChild.FOwnerDocument:
|
||||
@@ -542,6 +562,9 @@ proc appendChild*(n: PNode, newChild: PNode) =
|
||||
if n == newChild:
|
||||
raise newException(EHierarchyRequestErr, "You can't add a node into itself")
|
||||
|
||||
if n.nodeType in childlessObjects:
|
||||
raise newException(ENoModificationAllowedErr, "Cannot append children to a childless node")
|
||||
|
||||
if n.childNodes == nil: n.childNodes = @[]
|
||||
|
||||
newChild.FParentNode = n
|
||||
@@ -604,10 +627,43 @@ proc isSupported*(n: PNode, feature: string, version: string): bool =
|
||||
## feature and that feature is supported by this node.
|
||||
return n.FOwnerDocument.FImplementation.hasFeature(feature, version)
|
||||
|
||||
proc isEmpty(s: string): bool =
|
||||
|
||||
if s == "" or s == nil:
|
||||
return True
|
||||
for i in items(s):
|
||||
if i != ' ':
|
||||
return False
|
||||
return True
|
||||
|
||||
proc normalize*(n: PNode) =
|
||||
## Puts all Text nodes in the full depth of the sub-tree underneath this Node
|
||||
## Merges all seperated TextNodes together, and removes any empty TextNodes
|
||||
var curTextNode: PNode = nil
|
||||
var i: int = 0
|
||||
|
||||
# TODO
|
||||
var newChildNodes: seq[PNode] = @[]
|
||||
while True:
|
||||
if i >= n.childNodes.len:
|
||||
break
|
||||
if n.childNodes[i].nodeType == TextNode:
|
||||
|
||||
#If the TextNode is empty, remove it
|
||||
if PText(n.childNodes[i]).data.isEmpty():
|
||||
inc(i)
|
||||
|
||||
if curTextNode == nil:
|
||||
curTextNode = n.childNodes[i]
|
||||
else:
|
||||
PText(curTextNode).data.add(PText(n.childNodes[i]).data)
|
||||
curTextNode.nodeValue.add(PText(n.childNodes[i]).data)
|
||||
inc(i)
|
||||
else:
|
||||
newChildNodes.add(curTextNode)
|
||||
newChildNodes.add(n.childNodes[i])
|
||||
curTextNode = nil
|
||||
|
||||
inc(i)
|
||||
n.childNodes = newChildNodes
|
||||
|
||||
proc removeChild*(n: PNode, oldChild: PNode): PNode =
|
||||
## Removes the child node indicated by ``oldChild`` from the list of children, and returns it.
|
||||
@@ -791,26 +847,32 @@ proc setNamedItemNS*(NList: var seq[PAttr], arg: PAttr): PAttr =
|
||||
NList[index] = arg
|
||||
return item # Return the replaced node
|
||||
|
||||
# TODO - Maybe implement a ChildlessNode!^
|
||||
|
||||
# CharacterData - Decided to implement this,
|
||||
# Didn't add the procedures, because you can just edit .data
|
||||
|
||||
# Attr
|
||||
# Attributes
|
||||
proc name*(a: PAttr): string =
|
||||
## Returns the name of the Attribute
|
||||
|
||||
return a.FName
|
||||
|
||||
proc specified*(a: PAttr): bool =
|
||||
## Specifies whether this attribute was specified in the original document
|
||||
|
||||
return a.FSpecified
|
||||
|
||||
proc ownerElement*(a: PAttr): PElement =
|
||||
## Returns this Attributes owner element
|
||||
|
||||
return a.FOwnerElement
|
||||
|
||||
# Element
|
||||
# Attributes
|
||||
|
||||
proc tagName*(el: PElement): string =
|
||||
## Returns the Element Tag Name
|
||||
|
||||
return el.FTagName
|
||||
|
||||
# Procedures
|
||||
@@ -960,11 +1022,29 @@ proc setAttributeNS*(el: PElement, namespaceURI, localName, value: string) =
|
||||
proc splitData*(TextNode: PText, offset: int): PText =
|
||||
## Breaks this node into two nodes at the specified offset,
|
||||
## keeping both in the tree as siblings.
|
||||
|
||||
if offset > TextNode.data.len():
|
||||
raise newException(EIndexSizeErr, "Index out of bounds")
|
||||
|
||||
var left: string = TextNode.data.copy(0, offset)
|
||||
TextNode.data = left
|
||||
var right: string = TextNode.data.copy(offset, TextNode.data.len())
|
||||
|
||||
if TextNode.FParentNode != nil:
|
||||
for i in low(TextNode.FParentNode.childNodes)..high(TextNode.FParentNode.childNodes):
|
||||
if TextNode.FParentNode.childNodes[i] == TextNode:
|
||||
var newNode: PText = TextNode.FOwnerDocument.createTextNode(right)
|
||||
TextNode.FParentNode.childNodes.insert(newNode, i)
|
||||
return newNode
|
||||
else:
|
||||
var newNode: PText = TextNode.FOwnerDocument.createTextNode(right)
|
||||
return newNode
|
||||
|
||||
# TODO - need insert(seq[T])
|
||||
|
||||
# ProcessingInstruction
|
||||
proc target*(PI: PProcessingInstruction): string =
|
||||
proc target*(PI: PProcessingInstruction): string =
|
||||
## Returns the Processing Instructions target
|
||||
|
||||
return PI.FTarget
|
||||
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import xmldom, os, streams, parsexml, strutils
|
||||
type
|
||||
#Parsing errors
|
||||
EMismatchedTag* = object of E_Base ## Raised when a tag is not properly closed
|
||||
EParserError* = object of E_Base ## Raised when an unexpected XML Parser event occurs
|
||||
|
||||
template newException(exceptn, message: expr): expr =
|
||||
block: # open a new scope
|
||||
@@ -52,6 +53,7 @@ proc parseElement(x: var TXmlParser, doc: var PDocument): PElement =
|
||||
|
||||
of xmlElementEnd:
|
||||
if x.elementName == n.nodeName:
|
||||
# n.normalize() # Remove any whitespace etc.
|
||||
return n
|
||||
else: #The wrong element is ended
|
||||
raise newException(EMismatchedTag, "Mismatched tag at line " &
|
||||
@@ -71,8 +73,12 @@ proc parseElement(x: var TXmlParser, doc: var PDocument): PElement =
|
||||
n.appendChild(doc.createComment(x.charData()))
|
||||
of xmlPI:
|
||||
n.appendChild(doc.createProcessingInstruction(x.PIName(), x.PIRest()))
|
||||
|
||||
of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
|
||||
# Unused 'events'
|
||||
|
||||
else:
|
||||
# echo(x.kind()) # XXX do nothing here!?
|
||||
raise newException(EParserError, "Unexpected XML Parser event")
|
||||
x.next()
|
||||
|
||||
raise newException(EMismatchedTag,
|
||||
@@ -99,9 +105,12 @@ proc loadXML*(path: string): PDocument =
|
||||
of xmlElementStart, xmlElementOpen:
|
||||
var el: PElement = parseElement(x, XmlDoc)
|
||||
XmlDoc = dom.createDocument(el)
|
||||
of xmlWhitespace, xmlElementClose, xmlEntity, xmlSpecial:
|
||||
# Unused 'events'
|
||||
else:
|
||||
# echo(x.kind())
|
||||
raise newException(EParserError, "Unexpected XML Parser event")
|
||||
|
||||
close(x)
|
||||
return XmlDoc
|
||||
|
||||
|
||||
|
||||
@@ -21,6 +21,10 @@
|
||||
##
|
||||
## <h1><a href="http://force7.de/nimrod">Nimrod</a></h1>
|
||||
##
|
||||
## **Deprecated since version 0.8.8.** Use the macro ``<>`` in xmltree
|
||||
## instead.
|
||||
|
||||
{.deprecated.}
|
||||
|
||||
import
|
||||
macros, strutils
|
||||
@@ -52,8 +56,8 @@ proc xmlCheckedTag*(e: PNimrodNode, tag: string,
|
||||
|
||||
# copy the attributes; when iterating over them these lists
|
||||
# will be modified, so that each attribute is only given one value
|
||||
var req = splitSeq(reqAttr)
|
||||
var opt = splitSeq(optAttr)
|
||||
var req = split(reqAttr)
|
||||
var opt = split(optAttr)
|
||||
result = newNimNode(nnkBracket, e)
|
||||
result.add(newStrLitNode("<"))
|
||||
result.add(newStrLitNode(tag))
|
||||
|
||||
231
lib/pure/xmltree.nim
Normal file
231
lib/pure/xmltree.nim
Normal file
@@ -0,0 +1,231 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2010 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## A simple XML tree. More efficient and simpler than the DOM.
|
||||
|
||||
import macros, strtabs
|
||||
|
||||
type
|
||||
PXmlNode* = ref TXmlNode ## an XML tree consists of ``PXmlNode``s.
|
||||
|
||||
TXmlNodeKind* = enum ## different kinds of ``PXmlNode``s
|
||||
xnText, ## a text element
|
||||
xnElement, ## an element with 0 or more children
|
||||
xnCData, ## a CDATA node
|
||||
xnComment ## an XML comment
|
||||
|
||||
PXmlAttributes* = PStringTable ## an alias for a string to string mapping
|
||||
|
||||
TXmlNode {.pure, final, acyclic.} = object
|
||||
case k: TXmlNodeKind
|
||||
of xnText, xnComment, xnCData:
|
||||
fText: string
|
||||
of xnElement:
|
||||
fTag: string
|
||||
s: seq[PXmlNode]
|
||||
fAttr: PXmlAttributes
|
||||
fClientData: int ## for other clients
|
||||
|
||||
proc newXmlNode(kind: TXmlNodeKind): PXmlNode =
|
||||
## creates a new ``PXmlNode``.
|
||||
new(result)
|
||||
result.k = kind
|
||||
|
||||
proc newElement*(tag: string): PXmlNode =
|
||||
## creates a new ``PXmlNode``. of kind ``xnText`` with the given `tag`.
|
||||
result = newXmlNode(xnElement)
|
||||
result.fTag = tag
|
||||
result.s = @[]
|
||||
# init attributes lazily to safe memory
|
||||
|
||||
proc newText*(text: string): PXmlNode =
|
||||
## creates a new ``PXmlNode`` of kind ``xnText`` with the text `text`.
|
||||
result = newXmlNode(xnText)
|
||||
result.fText = text
|
||||
|
||||
proc newComment*(comment: string): PXmlNode =
|
||||
## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `comment`.
|
||||
result = newXmlNode(xnComment)
|
||||
result.fText = comment
|
||||
|
||||
proc newCData*(cdata: string): PXmlNode =
|
||||
## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `cdata`.
|
||||
result = newXmlNode(xnCData)
|
||||
result.fText = cdata
|
||||
|
||||
proc text*(n: PXmlNode): string {.inline.} =
|
||||
## gets the associated text with the node `n`. `n` can be a CDATA, Text
|
||||
## or comment node.
|
||||
assert n.k in {xnText, xnComment, xnCData}
|
||||
result = n.fText
|
||||
|
||||
proc tag*(n: PXmlNode): string {.inline.} =
|
||||
## gets the tag name of `n`. `n` has to be an ``xnElement`` node.
|
||||
assert n.k == xnElement
|
||||
result = n.fTag
|
||||
|
||||
proc add*(father, son: PXmlNode) {.inline.} =
|
||||
## adds the child `son` to `father`.
|
||||
add(father.s, son)
|
||||
|
||||
proc len*(n: PXmlNode): int {.inline.} =
|
||||
## returns the number `n`'s children.
|
||||
if n.k == xnElement: result = len(n.s)
|
||||
|
||||
proc kind*(n: PXmlNode): TXmlNodeKind {.inline.} =
|
||||
## returns `n`'s kind.
|
||||
result = n.k
|
||||
|
||||
proc `[]`* (n: PXmlNode, i: int): PXmlNode {.inline.} =
|
||||
## returns the `i`'th child of `n`.
|
||||
assert n.k == xnElement
|
||||
result = n.s[i]
|
||||
|
||||
iterator items*(n: PXmlNode): PXmlNode {.inline.} =
|
||||
## iterates over any child of `n`.
|
||||
assert n.k == xnElement
|
||||
for i in 0 .. n.len-1: yield n[i]
|
||||
|
||||
proc attr*(n: PXmlNode): PXmlAttributes {.inline.} =
|
||||
## gets the attributes belonging to `n`.
|
||||
assert n.k == xnElement
|
||||
result = n.fAttr
|
||||
|
||||
proc `attr=`*(n: PXmlNode, attr: PXmlAttributes) {.inline.} =
|
||||
## sets the attributes belonging to `n`.
|
||||
assert n.k == xnElement
|
||||
n.fAttr = attr
|
||||
|
||||
proc attrLen*(n: PXmlNode): int {.inline.} =
|
||||
## returns the number of `n`'s attributes.
|
||||
assert n.k == xnElement
|
||||
if not isNil(n.fAttr): result = len(n.fAttr)
|
||||
|
||||
proc clientData*(n: PXmlNode): int {.inline.} =
|
||||
## gets the client data of `n`. The client data field is used by the HTML
|
||||
## parser and generator.
|
||||
result = n.fClientData
|
||||
|
||||
proc `clientData=`*(n: PXmlNode, data: int) {.inline.} =
|
||||
## sets the client data of `n`. The client data field is used by the HTML
|
||||
## parser and generator.
|
||||
n.fClientData = data
|
||||
|
||||
proc addEscaped*(result: var string, s: string) =
|
||||
## same as ``result.add(escape(s))``, but more efficient.
|
||||
for c in items(s):
|
||||
case c
|
||||
of '<': result.add("<")
|
||||
of '>': result.add(">")
|
||||
of '&': result.add("&")
|
||||
of '"': result.add(""")
|
||||
else: result.add(c)
|
||||
|
||||
proc escape*(s: string): string =
|
||||
## escapes `s` for inclusion into an XML document.
|
||||
## Escapes these characters:
|
||||
##
|
||||
## ------------ -------------------
|
||||
## char is converted to
|
||||
## ------------ -------------------
|
||||
## ``<`` ``<``
|
||||
## ``>`` ``>``
|
||||
## ``&`` ``&``
|
||||
## ``"`` ``"``
|
||||
## ------------ -------------------
|
||||
result = newString(s.len)
|
||||
setLen(result, 0)
|
||||
addEscaped(result, s)
|
||||
|
||||
proc addIndent(result: var string, indent: int) =
|
||||
result.add("\n")
|
||||
for i in 1..indent: result.add(' ')
|
||||
|
||||
proc add*(result: var string, n: PXmlNode, indent = 0, indWidth = 2) =
|
||||
## adds the textual representation of `n` to `result`.
|
||||
case n.k
|
||||
of xnElement:
|
||||
result.add('<')
|
||||
result.add(n.fTag)
|
||||
if not isNil(n.fAttr):
|
||||
for key, val in pairs(n.fAttr):
|
||||
result.add(' ')
|
||||
result.add(key)
|
||||
result.add("=\"")
|
||||
result.addEscaped(val)
|
||||
result.add('"')
|
||||
if n.len > 0:
|
||||
result.add('>')
|
||||
for i in 0..n.len-1:
|
||||
result.addIndent(indent+indWidth)
|
||||
result.add(n[i], indent+indWidth, indWidth)
|
||||
result.addIndent(indent)
|
||||
result.add("</")
|
||||
result.add(n.fTag)
|
||||
result.add(">")
|
||||
else:
|
||||
result.add(" />")
|
||||
of xnText:
|
||||
result.addEscaped(n.fText)
|
||||
of xnComment:
|
||||
result.add("<!-- ")
|
||||
result.addEscaped(n.fText)
|
||||
result.add(" -->")
|
||||
of xnCDATA:
|
||||
result.add("<![CDATA[")
|
||||
result.add(n.fText)
|
||||
result.add("]]>")
|
||||
|
||||
proc `$`*(n: PXmlNode): string =
|
||||
## converts `n` into its string representation.
|
||||
result = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
|
||||
result.add(n)
|
||||
|
||||
proc newXmlTree*(tag: string, children: openArray[PXmlNode],
|
||||
attributes: PXmlAttributes = nil): PXmlNode =
|
||||
## creates a new XML tree with `tag`, `children` and `attributes`
|
||||
result = newXmlNode(xnElement)
|
||||
result.fTag = tag
|
||||
newSeq(result.s, children.len)
|
||||
for i in 0..children.len-1: result.s[i] = children[i]
|
||||
result.fAttr = attributes
|
||||
|
||||
proc xmlConstructor(e: PNimrodNode): PNimrodNode {.compileTime.} =
|
||||
## use this procedure to define a new XML tag
|
||||
expectLen(e, 1)
|
||||
var a = e[0]
|
||||
if a.kind == nnkCall:
|
||||
result = newCall("newXmlTree", toStrLit(a[0]))
|
||||
var attrs = newCall("newStringTable", [])
|
||||
var bracket = newNimNode(nnkBracket, a)
|
||||
for i in 1..a.len-1:
|
||||
if a[i].kind == nnkExprEqExpr:
|
||||
attrs.add(toStrLit(a[i][0]))
|
||||
attrs.add(a[i][1])
|
||||
else:
|
||||
bracket.add(a[i])
|
||||
result.add(bracket)
|
||||
if attrs.len > 1: result.add(attrs)
|
||||
else:
|
||||
result = newCall("newXmlTree", toStrLit(a))
|
||||
|
||||
macro `<>`*(x: expr): expr =
|
||||
## Constructor macro for XML. Example usage:
|
||||
##
|
||||
## .. code-block:: nimrod
|
||||
## <>a(href="http://force7.de/nimrod", "Nimrod rules.")
|
||||
##
|
||||
## Produces an XML tree for::
|
||||
##
|
||||
## <a href="http://force7.de/nimrod">Nimrod rules.</a>
|
||||
##
|
||||
result = xmlConstructor(x)
|
||||
|
||||
|
||||
|
||||
52
lib/pure/xmltreeparser.nim
Normal file
52
lib/pure/xmltreeparser.nim
Normal file
@@ -0,0 +1,52 @@
|
||||
#
|
||||
#
|
||||
# Nimrod's Runtime Library
|
||||
# (c) Copyright 2010 Andreas Rumpf
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## This module parses an XML document and creates its XML tree representation.
|
||||
|
||||
import streams, parsexml, xmltree
|
||||
|
||||
|
||||
proc parse*(x: var TXmlParser, father: PXmlNode) =
|
||||
|
||||
|
||||
proc parseXml*(s: PStream, filename: string,
|
||||
errors: var seq[string]): PXmlNode =
|
||||
## parses the XML from stream `s` and returns a ``PXmlNode``. Every
|
||||
## occured parsing error is added to the `errors` sequence.
|
||||
var x: TXmlParser
|
||||
open(x, s, filename, {reportComments})
|
||||
|
||||
result = newElement("html")
|
||||
while true:
|
||||
x.next()
|
||||
case x.kind
|
||||
of xmlWhitespace: nil # just skip it
|
||||
of xmlComment:
|
||||
result.add(newComment(x.text))
|
||||
|
||||
close(x)
|
||||
|
||||
proc parseXml*(s: PStream): PXmlNode =
|
||||
## parses the XTML from stream `s` and returns a ``PXmlNode``. All parsing
|
||||
## errors are ignored.
|
||||
var errors: seq[string] = @[]
|
||||
result = parseXml(s, "unknown_html_doc", errors)
|
||||
|
||||
proc loadXml*(path: string, reportErrors = false): PXmlNode =
|
||||
## Loads and parses XML from file specified by ``path``, and returns
|
||||
## a ``PXmlNode``. If `reportErrors` is true, the parsing errors are
|
||||
## ``echo``ed.
|
||||
var s = newFileStream(path, fmRead)
|
||||
if s == nil: raise newException(EIO, "Unable to read file: " & path)
|
||||
|
||||
var errors: seq[string] = @[]
|
||||
result = parseXml(s, path, errors)
|
||||
if reportErrors:
|
||||
for msg in items(errors): echo(msg)
|
||||
|
||||
@@ -1257,7 +1257,9 @@ proc echo*[Ty](x: openarray[Ty]) {.magic: "Echo".}
|
||||
## equivalent to ``writeln(stdout, x); flush(stdout)``. BUT: This is
|
||||
## available for the ECMAScript target too!
|
||||
|
||||
template newException(exceptn, message: expr): expr =
|
||||
template newException*(exceptn, message: expr): expr =
|
||||
## creates an exception object of type "exceptn" and sets its ``msg`` field
|
||||
## to `message`. Returns the new exception object.
|
||||
block: # open a new scope
|
||||
var
|
||||
e: ref exceptn
|
||||
|
||||
@@ -26,6 +26,7 @@ Additions
|
||||
- Added ``system.cstringArrayToSeq``.
|
||||
- Added ``system.lines(f: TFile)`` iterator.
|
||||
- Added ``system.delete``, ``system.del`` and ``system.insert`` for sequences.
|
||||
- Exported ``system.newException`` template.
|
||||
- Added ``cgi.decodeData(data: string): tuple[key, value: string]``.
|
||||
- Added ``ropes`` module.
|
||||
- Added ``sockets`` module.
|
||||
@@ -36,6 +37,9 @@ Additions
|
||||
- Added ``unidecode`` module.
|
||||
- Added ``xmldom`` module.
|
||||
- Added ``xmldomparser`` module.
|
||||
- Added ``xmltree`` module.
|
||||
- Added ``xmltreeparser`` module.
|
||||
- Added ``htmlparser`` module.
|
||||
- Many wrappers now do not contain redundant name prefixes (like ``GTK_``,
|
||||
``lua``). The new wrappers are available in ``lib/newwrap``. Change
|
||||
your configuration file to use these.
|
||||
@@ -100,7 +104,7 @@ Changes affecting backwards compatibility
|
||||
- The compiler does not skip the linking step anymore even if no file
|
||||
has changed.
|
||||
- ``os.splitFile(".xyz")`` now returns ``("", ".xyz", "")`` instead of
|
||||
``("", "", ".xyz")``. Filenames starting with a dot are handled
|
||||
``("", "", ".xyz")``. So filenames starting with a dot are handled
|
||||
differently.
|
||||
- ``strutils.split(s: string, seps: set[char])`` never yields the empty string
|
||||
anymore. This behaviour is probably more appropriate for whitespace splitting.
|
||||
|
||||
Reference in New Issue
Block a user