|
|
|
|
@@ -1,7 +1,7 @@
|
|
|
|
|
#
|
|
|
|
|
#
|
|
|
|
|
# Nimrod's Runtime Library
|
|
|
|
|
# (c) Copyright 2010 Andreas Rumpf
|
|
|
|
|
# (c) Copyright 2013 Andreas Rumpf
|
|
|
|
|
#
|
|
|
|
|
# See the file "copying.txt", included in this
|
|
|
|
|
# distribution, for details about the copyright.
|
|
|
|
|
@@ -35,8 +35,12 @@ type
|
|
|
|
|
tagAddress, ## the HTML ``address`` element
|
|
|
|
|
tagApplet, ## the deprecated HTML ``applet`` element
|
|
|
|
|
tagArea, ## the HTML ``area`` element
|
|
|
|
|
tagArticle, ## the HTML ``article`` element
|
|
|
|
|
tagAside, ## the HTML ``aside`` element
|
|
|
|
|
tagAudio, ## the HTML ``audio`` element
|
|
|
|
|
tagB, ## the HTML ``b`` element
|
|
|
|
|
tagBase, ## the HTML ``base`` element
|
|
|
|
|
tagBdi, ## the HTML ``bdi`` element
|
|
|
|
|
tagBdo, ## the deprecated HTML ``dbo`` element
|
|
|
|
|
tagBasefont, ## the deprecated HTML ``basefont`` element
|
|
|
|
|
tagBig, ## the HTML ``big`` element
|
|
|
|
|
@@ -44,22 +48,31 @@ type
|
|
|
|
|
tagBody, ## the HTML ``body`` element
|
|
|
|
|
tagBr, ## the HTML ``br`` element
|
|
|
|
|
tagButton, ## the HTML ``button`` element
|
|
|
|
|
tagCanvas, ## the HTML ``canvas`` element
|
|
|
|
|
tagCaption, ## the HTML ``caption`` element
|
|
|
|
|
tagCenter, ## the deprecated HTML ``center`` element
|
|
|
|
|
tagCite, ## the HTML ``cite`` element
|
|
|
|
|
tagCode, ## the HTML ``code`` element
|
|
|
|
|
tagCol, ## the HTML ``col`` element
|
|
|
|
|
tagColgroup, ## the HTML ``colgroup`` element
|
|
|
|
|
tagCommand, ## the HTML ``command`` element
|
|
|
|
|
tagDatalist, ## the HTML ``datalist`` element
|
|
|
|
|
tagDd, ## the HTML ``dd`` element
|
|
|
|
|
tagDel, ## the HTML ``del`` element
|
|
|
|
|
tagDetails, ## the HTML ``details`` element
|
|
|
|
|
tagDfn, ## the HTML ``dfn`` element
|
|
|
|
|
tagDialog, ## the HTML ``dialog`` element
|
|
|
|
|
tagDiv, ## the HTML ``div`` element
|
|
|
|
|
tagDir, ## the deprecated HTLM ``dir`` element
|
|
|
|
|
tagDl, ## the HTML ``dl`` element
|
|
|
|
|
tagDt, ## the HTML ``dt`` element
|
|
|
|
|
tagEm, ## the HTML ``em`` element
|
|
|
|
|
tagEmbed, ## the HTML ``embed`` element
|
|
|
|
|
tagFieldset, ## the HTML ``fieldset`` element
|
|
|
|
|
tagFigcaption, ## the HTML ``figcaption`` element
|
|
|
|
|
tagFigure, ## the HTML ``figure`` element
|
|
|
|
|
tagFont, ## the deprecated HTML ``font`` element
|
|
|
|
|
tagFooter, ## the HTML ``footer`` element
|
|
|
|
|
tagForm, ## the HTML ``form`` element
|
|
|
|
|
tagFrame, ## the HTML ``frame`` element
|
|
|
|
|
tagFrameset, ## the deprecated HTML ``frameset`` element
|
|
|
|
|
@@ -70,6 +83,8 @@ type
|
|
|
|
|
tagH5, ## the HTML ``h5`` element
|
|
|
|
|
tagH6, ## the HTML ``h6`` element
|
|
|
|
|
tagHead, ## the HTML ``head`` element
|
|
|
|
|
tagHeader, ## the HTML ``header`` element
|
|
|
|
|
tagHgroup, ## the HTML ``hgroup`` element
|
|
|
|
|
tagHtml, ## the HTML ``html`` element
|
|
|
|
|
tagHr, ## the HTML ``hr`` element
|
|
|
|
|
tagI, ## the HTML ``i`` element
|
|
|
|
|
@@ -79,13 +94,17 @@ type
|
|
|
|
|
tagIns, ## the HTML ``ins`` element
|
|
|
|
|
tagIsindex, ## the deprecated HTML ``isindex`` element
|
|
|
|
|
tagKbd, ## the HTML ``kbd`` element
|
|
|
|
|
tagKeygen, ## the HTML ``keygen`` element
|
|
|
|
|
tagLabel, ## the HTML ``label`` element
|
|
|
|
|
tagLegend, ## the HTML ``legend`` element
|
|
|
|
|
tagLi, ## the HTML ``li`` element
|
|
|
|
|
tagLink, ## the HTML ``link`` element
|
|
|
|
|
tagMap, ## the HTML ``map`` element
|
|
|
|
|
tagMark, ## the HTML ``mark`` element
|
|
|
|
|
tagMenu, ## the deprecated HTML ``menu`` element
|
|
|
|
|
tagMeta, ## the HTML ``meta`` element
|
|
|
|
|
tagMeter, ## the HTML ``meter`` element
|
|
|
|
|
tagNav, ## the HTML ``nav`` element
|
|
|
|
|
tagNobr, ## the deprecated HTML ``nobr`` element
|
|
|
|
|
tagNoframes, ## the deprecated HTML ``noframes`` element
|
|
|
|
|
tagNoscript, ## the HTML ``noscript`` element
|
|
|
|
|
@@ -93,20 +112,28 @@ type
|
|
|
|
|
tagOl, ## the HTML ``ol`` element
|
|
|
|
|
tagOptgroup, ## the HTML ``optgroup`` element
|
|
|
|
|
tagOption, ## the HTML ``option`` element
|
|
|
|
|
tagOutput, ## the HTML ``output`` element
|
|
|
|
|
tagP, ## the HTML ``p`` element
|
|
|
|
|
tagParam, ## the HTML ``param`` element
|
|
|
|
|
tagPre, ## the HTML ``pre`` element
|
|
|
|
|
tagProgress, ## the HTML ``progress`` element
|
|
|
|
|
tagQ, ## the HTML ``q`` element
|
|
|
|
|
tagRp, ## the HTML ``rp`` element
|
|
|
|
|
tagRt, ## the HTML ``rt`` element
|
|
|
|
|
tagRuby, ## the HTML ``ruby`` element
|
|
|
|
|
tagS, ## the deprecated HTML ``s`` element
|
|
|
|
|
tagSamp, ## the HTML ``samp`` element
|
|
|
|
|
tagScript, ## the HTML ``script`` element
|
|
|
|
|
tagSection, ## the HTML ``section`` element
|
|
|
|
|
tagSelect, ## the HTML ``select`` element
|
|
|
|
|
tagSmall, ## the HTML ``small`` element
|
|
|
|
|
tagSource, ## the HTML ``source`` element
|
|
|
|
|
tagSpan, ## the HTML ``span`` element
|
|
|
|
|
tagStrike, ## the deprecated HTML ``strike`` element
|
|
|
|
|
tagStrong, ## the HTML ``strong`` element
|
|
|
|
|
tagStyle, ## the HTML ``style`` element
|
|
|
|
|
tagSub, ## the HTML ``sub`` element
|
|
|
|
|
tagSummary, ## the HTML ``summary`` element
|
|
|
|
|
tagSup, ## the HTML ``sup`` element
|
|
|
|
|
tagTable, ## the HTML ``table`` element
|
|
|
|
|
tagTbody, ## the HTML ``tbody`` element
|
|
|
|
|
@@ -115,37 +142,46 @@ type
|
|
|
|
|
tagTfoot, ## the HTML ``tfoot`` element
|
|
|
|
|
tagTh, ## the HTML ``th`` element
|
|
|
|
|
tagThead, ## the HTML ``thead`` element
|
|
|
|
|
tagTime, ## the HTML ``time`` element
|
|
|
|
|
tagTitle, ## the HTML ``title`` element
|
|
|
|
|
tagTr, ## the HTML ``tr`` element
|
|
|
|
|
tagTrack, ## the HTML ``track`` element
|
|
|
|
|
tagTt, ## the HTML ``tt`` element
|
|
|
|
|
tagU, ## the deprecated HTML ``u`` element
|
|
|
|
|
tagUl, ## the HTML ``ul`` element
|
|
|
|
|
tagVar ## the HTML ``var`` element
|
|
|
|
|
tagVar, ## the HTML ``var`` element
|
|
|
|
|
tagVideo, ## the HTML ``video`` element
|
|
|
|
|
tagWbr ## the HTML ``wbr`` element
|
|
|
|
|
|
|
|
|
|
const
|
|
|
|
|
tagToStr* = [
|
|
|
|
|
"a", "abbr", "acronym", "address", "applet", "area",
|
|
|
|
|
"b", "base", "basefont", "bdo", "big", "blockquote", "body",
|
|
|
|
|
"br", "button", "caption", "center", "cite", "code",
|
|
|
|
|
"col", "colgroup", "dd", "del", "dfn", "div",
|
|
|
|
|
"dir", "dl", "dt", "em", "fieldset", "font",
|
|
|
|
|
"a", "abbr", "acronym", "address", "applet", "area", "article",
|
|
|
|
|
"aside", "audio",
|
|
|
|
|
"b", "base", "basefont", "bdi", "bdo", "big", "blockquote", "body",
|
|
|
|
|
"br", "button", "canvas", "caption", "center", "cite", "code",
|
|
|
|
|
"col", "colgroup", "command",
|
|
|
|
|
"datalist", "dd", "del", "details", "dfn", "dialog", "div",
|
|
|
|
|
"dir", "dl", "dt", "em", "embed", "fieldset",
|
|
|
|
|
"figcaption", "figure", "font", "footer",
|
|
|
|
|
"form", "frame", "frameset", "h1", "h2", "h3",
|
|
|
|
|
"h4", "h5", "h6", "head", "html", "hr",
|
|
|
|
|
"h4", "h5", "h6", "head", "header", "hgroup", "html", "hr",
|
|
|
|
|
"i", "iframe", "img", "input", "ins", "isindex",
|
|
|
|
|
"kbd", "label", "legend", "li", "link", "map",
|
|
|
|
|
"menu", "meta", "nobr", "noframes", "noscript", "object", "ol",
|
|
|
|
|
"optgroup", "option", "p", "param", "pre", "q",
|
|
|
|
|
"s", "samp", "script", "select", "small", "span",
|
|
|
|
|
"strike", "strong", "style", "sub", "sup", "table",
|
|
|
|
|
"tbody", "td", "textarea", "tfoot", "th", "thead",
|
|
|
|
|
"title", "tr", "tt", "u", "ul", "var"]
|
|
|
|
|
"kbd", "keygen", "label", "legend", "li", "link", "map", "mark",
|
|
|
|
|
"menu", "meta", "meter", "nav", "nobr", "noframes", "noscript",
|
|
|
|
|
"object", "ol",
|
|
|
|
|
"optgroup", "option", "output", "p", "param", "pre", "progress", "q",
|
|
|
|
|
"rp", "rt", "ruby", "s", "samp", "script", "section", "select", "small",
|
|
|
|
|
"source", "span", "strike", "strong", "style",
|
|
|
|
|
"sub", "summary", "sup", "table",
|
|
|
|
|
"tbody", "td", "textarea", "tfoot", "th", "thead", "time",
|
|
|
|
|
"title", "tr", "track", "tt", "u", "ul", "var", "video", "wbr"]
|
|
|
|
|
InlineTags* = {tagA, tagAbbr, tagAcronym, tagApplet, tagB, tagBasefont,
|
|
|
|
|
tagBdo, tagBig, tagBr, tagButton, tagCite, tagCode, tagDel, tagDfn,
|
|
|
|
|
tagEm, tagFont, tagI, tagImg, tagIns, tagInput, tagIframe, tagKbd,
|
|
|
|
|
tagLabel, tagMap, tagObject, tagQ, tagSamp, tagScript, tagSelect,
|
|
|
|
|
tagSmall, tagSpan, tagStrong, tagSub, tagSup, tagTextarea, tagTt,
|
|
|
|
|
tagVar, tagApplet, tagBasefont, tagFont, tagIframe, tagU, tagS,
|
|
|
|
|
tagStrike}
|
|
|
|
|
tagStrike, tagWbr}
|
|
|
|
|
BlockTags* = {tagAddress, tagBlockquote, tagCenter, tagDel, tagDir, tagDiv,
|
|
|
|
|
tagDl, tagFieldset, tagForm, tagH1, tagH2, tagH3, tagH4,
|
|
|
|
|
tagH5, tagH6, tagHr, tagIns, tagIsindex, tagMenu, tagNoframes, tagNoscript,
|
|
|
|
|
@@ -153,7 +189,7 @@ const
|
|
|
|
|
tagMenu, tagNoframes}
|
|
|
|
|
SingleTags* = {tagArea, tagBase, tagBasefont,
|
|
|
|
|
tagBr, tagCol, tagFrame, tagHr, tagImg, tagIsindex,
|
|
|
|
|
tagLink, tagMeta, tagParam}
|
|
|
|
|
tagLink, tagMeta, tagParam, tagWbr}
|
|
|
|
|
|
|
|
|
|
Entities = [
|
|
|
|
|
("nbsp", 0x00A0), ("iexcl", 0x00A1), ("cent", 0x00A2), ("pound", 0x00A3),
|
|
|
|
|
@@ -228,28 +264,149 @@ const
|
|
|
|
|
("loz", 0x25CA), ("spades", 0x2660), ("clubs", 0x2663),
|
|
|
|
|
("hearts", 0x2665), ("diams", 0x2666)]
|
|
|
|
|
|
|
|
|
|
proc binaryStrSearch(x: openarray[string], y: string): int =
|
|
|
|
|
## XXX put this into the library somewhere!
|
|
|
|
|
var a = 0
|
|
|
|
|
var b = len(x) - 1
|
|
|
|
|
while a <= b:
|
|
|
|
|
var mid = (a + b) div 2
|
|
|
|
|
var c = cmp(x[mid], y)
|
|
|
|
|
if c < 0: a = mid + 1
|
|
|
|
|
elif c > 0: b = mid - 1
|
|
|
|
|
else: return mid
|
|
|
|
|
result = - 1
|
|
|
|
|
proc allLower(s: string): bool =
|
|
|
|
|
for c in s:
|
|
|
|
|
if c < 'a' or c > 'z': return false
|
|
|
|
|
return true
|
|
|
|
|
|
|
|
|
|
proc toHtmlTag(s: string): THtmlTag =
|
|
|
|
|
case s
|
|
|
|
|
of "a": tagA
|
|
|
|
|
of "abbr": tagAbbr
|
|
|
|
|
of "acronym": tagAcronym
|
|
|
|
|
of "address": tagAddress
|
|
|
|
|
of "applet": tagApplet
|
|
|
|
|
of "area": tagArea
|
|
|
|
|
of "article": tagArticle
|
|
|
|
|
of "aside": tagAside
|
|
|
|
|
of "audio": tagAudio
|
|
|
|
|
of "b": tagB
|
|
|
|
|
of "base": tagBase
|
|
|
|
|
of "basefont": tagBasefont
|
|
|
|
|
of "bdi": tagBdi
|
|
|
|
|
of "bdo": tagBdo
|
|
|
|
|
of "big": tagBig
|
|
|
|
|
of "blockquote": tagBlockquote
|
|
|
|
|
of "body": tagBody
|
|
|
|
|
of "br": tagBr
|
|
|
|
|
of "button": tagButton
|
|
|
|
|
of "canvas": tagCanvas
|
|
|
|
|
of "caption": tagCaption
|
|
|
|
|
of "center": tagCenter
|
|
|
|
|
of "cite": tagCite
|
|
|
|
|
of "code": tagCode
|
|
|
|
|
of "col": tagCol
|
|
|
|
|
of "colgroup": tagColgroup
|
|
|
|
|
of "command": tagCommand
|
|
|
|
|
of "datalist": tagDatalist
|
|
|
|
|
of "dd": tagDd
|
|
|
|
|
of "del": tagDel
|
|
|
|
|
of "details": tagDetails
|
|
|
|
|
of "dfn": tagDfn
|
|
|
|
|
of "dialog": tagDialog
|
|
|
|
|
of "div": tagDiv
|
|
|
|
|
of "dir": tagDir
|
|
|
|
|
of "dl": tagDl
|
|
|
|
|
of "dt": tagDt
|
|
|
|
|
of "em": tagEm
|
|
|
|
|
of "embed": tagEmbed
|
|
|
|
|
of "fieldset": tagFieldset
|
|
|
|
|
of "figcaption": tagFigcaption
|
|
|
|
|
of "figure": tagFigure
|
|
|
|
|
of "font": tagFont
|
|
|
|
|
of "footer": tagFooter
|
|
|
|
|
of "form": tagForm
|
|
|
|
|
of "frame": tagFrame
|
|
|
|
|
of "frameset": tagFrameset
|
|
|
|
|
of "h1": tagH1
|
|
|
|
|
of "h2": tagH2
|
|
|
|
|
of "h3": tagH3
|
|
|
|
|
of "h4": tagH4
|
|
|
|
|
of "h5": tagH5
|
|
|
|
|
of "h6": tagH6
|
|
|
|
|
of "head": tagHead
|
|
|
|
|
of "header": tagHeader
|
|
|
|
|
of "hgroup": tagHgroup
|
|
|
|
|
of "html": tagHtml
|
|
|
|
|
of "hr": tagHr
|
|
|
|
|
of "i": tagI
|
|
|
|
|
of "iframe": tagIframe
|
|
|
|
|
of "img": tagImg
|
|
|
|
|
of "input": tagInput
|
|
|
|
|
of "ins": tagIns
|
|
|
|
|
of "isindex": tagIsindex
|
|
|
|
|
of "kbd": tagKbd
|
|
|
|
|
of "keygen": tagKeygen
|
|
|
|
|
of "label": tagLabel
|
|
|
|
|
of "legend": tagLegend
|
|
|
|
|
of "li": tagLi
|
|
|
|
|
of "link": tagLink
|
|
|
|
|
of "map": tagMap
|
|
|
|
|
of "mark": tagMark
|
|
|
|
|
of "menu": tagMenu
|
|
|
|
|
of "meta": tagMeta
|
|
|
|
|
of "meter": tagMeter
|
|
|
|
|
of "nav": tagNav
|
|
|
|
|
of "nobr": tagNobr
|
|
|
|
|
of "noframes": tagNoframes
|
|
|
|
|
of "noscript": tagNoscript
|
|
|
|
|
of "object": tagObject
|
|
|
|
|
of "ol": tagOl
|
|
|
|
|
of "optgroup": tagOptgroup
|
|
|
|
|
of "option": tagOption
|
|
|
|
|
of "output": tagOutput
|
|
|
|
|
of "p": tagP
|
|
|
|
|
of "param": tagParam
|
|
|
|
|
of "pre": tagPre
|
|
|
|
|
of "progress": tagProgress
|
|
|
|
|
of "q": tagQ
|
|
|
|
|
of "rp": tagRp
|
|
|
|
|
of "rt": tagRt
|
|
|
|
|
of "ruby": tagRuby
|
|
|
|
|
of "s": tagS
|
|
|
|
|
of "samp": tagSamp
|
|
|
|
|
of "script": tagScript
|
|
|
|
|
of "section": tagSection
|
|
|
|
|
of "select": tagSelect
|
|
|
|
|
of "small": tagSmall
|
|
|
|
|
of "source": tagSource
|
|
|
|
|
of "span": tagSpan
|
|
|
|
|
of "strike": tagStrike
|
|
|
|
|
of "strong": tagStrong
|
|
|
|
|
of "style": tagStyle
|
|
|
|
|
of "sub": tagSub
|
|
|
|
|
of "summary": tagSummary
|
|
|
|
|
of "sup": tagSup
|
|
|
|
|
of "table": tagTable
|
|
|
|
|
of "tbody": tagTbody
|
|
|
|
|
of "td": tagTd
|
|
|
|
|
of "textarea": tagTextarea
|
|
|
|
|
of "tfoot": tagTfoot
|
|
|
|
|
of "th": tagTh
|
|
|
|
|
of "thead": tagThead
|
|
|
|
|
of "time": tagTime
|
|
|
|
|
of "title": tagTitle
|
|
|
|
|
of "tr": tagTr
|
|
|
|
|
of "track": tagTrack
|
|
|
|
|
of "tt": tagTt
|
|
|
|
|
of "u": tagU
|
|
|
|
|
of "ul": tagUl
|
|
|
|
|
of "var": tagVar
|
|
|
|
|
of "video": tagVideo
|
|
|
|
|
of "wbr": tagWbr
|
|
|
|
|
else: tagUnknown
|
|
|
|
|
|
|
|
|
|
proc htmlTag*(n: PXmlNode): THtmlTag =
|
|
|
|
|
## gets `n`'s tag as a ``THtmlTag``.
|
|
|
|
|
if n.clientData == 0:
|
|
|
|
|
n.clientData = binaryStrSearch(tagToStr, n.tag)+1
|
|
|
|
|
n.clientData = toHtmlTag(n.tag).ord
|
|
|
|
|
result = THtmlTag(n.clientData)
|
|
|
|
|
|
|
|
|
|
proc htmlTag*(s: string): THtmlTag =
|
|
|
|
|
## converts `s` to a ``THtmlTag``. If `s` is no HTML tag, ``tagUnknown`` is
|
|
|
|
|
## returned.
|
|
|
|
|
result = THtmlTag(binaryStrSearch(tagToStr, s.toLower)+1)
|
|
|
|
|
let s = if allLower(s): s else: s.toLower
|
|
|
|
|
result = toHtmlTag(s)
|
|
|
|
|
|
|
|
|
|
proc entityToUtf8*(entity: string): string =
|
|
|
|
|
## converts an HTML entity name like ``Ü`` to its UTF-8 equivalent.
|
|
|
|
|
@@ -267,11 +424,13 @@ proc parse(x: var TXmlParser, errors: var seq[string]): PXmlNode
|
|
|
|
|
proc expected(x: var TXmlParser, n: PXmlNode): string =
|
|
|
|
|
result = errorMsg(x, "</" & n.tag & "> expected")
|
|
|
|
|
|
|
|
|
|
template elemName(x: expr): expr = rawData(x)
|
|
|
|
|
|
|
|
|
|
proc untilElementEnd(x: var TXmlParser, result: PXmlNode,
|
|
|
|
|
errors: var seq[string]) =
|
|
|
|
|
# we parsed e.g. ``<br>`` and don't really expect a ``</br>``:
|
|
|
|
|
if result.htmlTag in singleTags:
|
|
|
|
|
if x.kind != xmlElementEnd or cmpIgnoreCase(x.elementName, result.tag) != 0:
|
|
|
|
|
if x.kind != xmlElementEnd or cmpIgnoreCase(x.elemName, result.tag) != 0:
|
|
|
|
|
return
|
|
|
|
|
while true:
|
|
|
|
|
case x.kind
|
|
|
|
|
@@ -279,28 +438,29 @@ proc untilElementEnd(x: var TXmlParser, result: PXmlNode,
|
|
|
|
|
case result.htmlTag
|
|
|
|
|
of tagLi, tagP, tagDt, tagDd, tagInput, tagOption:
|
|
|
|
|
# some tags are common to have no ``</end>``, like ``<li>``:
|
|
|
|
|
if htmlTag(x.elementName) in {tagLi, tagP, tagDt, tagDd, tagInput,
|
|
|
|
|
tagOption}:
|
|
|
|
|
if htmlTag(x.elemName) in {tagLi, tagP, tagDt, tagDd, tagInput,
|
|
|
|
|
tagOption}:
|
|
|
|
|
errors.add(expected(x, result))
|
|
|
|
|
break
|
|
|
|
|
when false:
|
|
|
|
|
if htmlTag(x.elementName) notin InlineTags:
|
|
|
|
|
errors.add(expected(x, result))
|
|
|
|
|
break
|
|
|
|
|
of tagTr, tagTd, tagTh, tagTfoot, tagThead:
|
|
|
|
|
if htmlTag(x.elementName) in {tagTr, tagTd, tagTh, tagTfoot, tagThead}:
|
|
|
|
|
of tagTd, tagTh, tagTfoot, tagThead:
|
|
|
|
|
if htmlTag(x.elemName) in {tagTr, tagTd, tagTh, tagTfoot, tagThead}:
|
|
|
|
|
errors.add(expected(x, result))
|
|
|
|
|
break
|
|
|
|
|
of tagTr:
|
|
|
|
|
if htmlTag(x.elemName) == tagTr:
|
|
|
|
|
errors.add(expected(x, result))
|
|
|
|
|
break
|
|
|
|
|
of tagOptgroup:
|
|
|
|
|
if htmlTag(x.elementName) in {tagOption, tagOptgroup}:
|
|
|
|
|
if htmlTag(x.elemName) in {tagOption, tagOptgroup}:
|
|
|
|
|
errors.add(expected(x, result))
|
|
|
|
|
break
|
|
|
|
|
else: nil
|
|
|
|
|
result.addNode(parse(x, errors))
|
|
|
|
|
of xmlElementEnd:
|
|
|
|
|
if cmpIgnoreCase(x.elementName, result.tag) == 0:
|
|
|
|
|
if cmpIgnoreCase(x.elemName, result.tag) == 0:
|
|
|
|
|
next(x)
|
|
|
|
|
else:
|
|
|
|
|
echo "5; expected: ", result.htmltag, " ", x.elemName
|
|
|
|
|
errors.add(expected(x, result))
|
|
|
|
|
# do not skip it here!
|
|
|
|
|
break
|
|
|
|
|
@@ -313,10 +473,10 @@ proc untilElementEnd(x: var TXmlParser, result: PXmlNode,
|
|
|
|
|
proc parse(x: var TXmlParser, errors: var seq[string]): PXmlNode =
|
|
|
|
|
case x.kind
|
|
|
|
|
of xmlComment:
|
|
|
|
|
result = newComment(x.charData)
|
|
|
|
|
result = newComment(x.rawData)
|
|
|
|
|
next(x)
|
|
|
|
|
of xmlCharData, xmlWhitespace:
|
|
|
|
|
result = newText(x.charData)
|
|
|
|
|
result = newText(x.rawData)
|
|
|
|
|
next(x)
|
|
|
|
|
of xmlPI, xmlSpecial:
|
|
|
|
|
# we just ignore processing instructions for now
|
|
|
|
|
@@ -325,19 +485,19 @@ proc parse(x: var TXmlParser, errors: var seq[string]): PXmlNode =
|
|
|
|
|
errors.add(errorMsg(x))
|
|
|
|
|
next(x)
|
|
|
|
|
of xmlElementStart:
|
|
|
|
|
result = newElement(x.elementName.toLower)
|
|
|
|
|
result = newElement(x.elemName.toLower)
|
|
|
|
|
next(x)
|
|
|
|
|
untilElementEnd(x, result, errors)
|
|
|
|
|
of xmlElementEnd:
|
|
|
|
|
errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
|
|
|
|
|
errors.add(errorMsg(x, "unexpected ending tag: " & x.elemName))
|
|
|
|
|
of xmlElementOpen:
|
|
|
|
|
result = newElement(x.elementName.toLower)
|
|
|
|
|
result = newElement(x.elemName.toLower)
|
|
|
|
|
next(x)
|
|
|
|
|
result.attrs = newStringTable()
|
|
|
|
|
while true:
|
|
|
|
|
case x.kind
|
|
|
|
|
of xmlAttribute:
|
|
|
|
|
result.attrs[x.attrKey] = x.attrValue
|
|
|
|
|
result.attrs[x.rawData] = x.rawData2
|
|
|
|
|
next(x)
|
|
|
|
|
of xmlElementClose:
|
|
|
|
|
next(x)
|
|
|
|
|
@@ -355,10 +515,10 @@ proc parse(x: var TXmlParser, errors: var seq[string]): PXmlNode =
|
|
|
|
|
errors.add(errorMsg(x, "<some_tag> expected"))
|
|
|
|
|
next(x)
|
|
|
|
|
of xmlCData:
|
|
|
|
|
result = newCData(x.charData)
|
|
|
|
|
result = newCData(x.rawData)
|
|
|
|
|
next(x)
|
|
|
|
|
of xmlEntity:
|
|
|
|
|
var u = entityToUtf8(x.entityName)
|
|
|
|
|
var u = entityToUtf8(x.rawData)
|
|
|
|
|
if u.len != 0: result = newText(u)
|
|
|
|
|
next(x)
|
|
|
|
|
of xmlEof: nil
|
|
|
|
|
@@ -372,15 +532,17 @@ proc parseHtml*(s: PStream, filename: string,
|
|
|
|
|
next(x)
|
|
|
|
|
# skip the DOCTYPE:
|
|
|
|
|
if x.kind == xmlSpecial: next(x)
|
|
|
|
|
result = parse(x, errors)
|
|
|
|
|
if x.kind != xmlEof:
|
|
|
|
|
errors.add(errorMsg(x, "EOF expected"))
|
|
|
|
|
|
|
|
|
|
result = newElement("document")
|
|
|
|
|
result.addNode(parse(x, errors))
|
|
|
|
|
#if x.kind != xmlEof:
|
|
|
|
|
# errors.add(errorMsg(x, "EOF expected"))
|
|
|
|
|
while x.kind != xmlEof:
|
|
|
|
|
var oldPos = x.bufpos # little hack to see if we made any progess
|
|
|
|
|
result.addNode(parse(x, errors))
|
|
|
|
|
if x.bufpos == oldPos:
|
|
|
|
|
if x.bufpos == oldPos:
|
|
|
|
|
# force progress!
|
|
|
|
|
next(x)
|
|
|
|
|
next(x)
|
|
|
|
|
close(x)
|
|
|
|
|
|
|
|
|
|
proc parseHtml*(s: PStream): PXmlNode =
|
|
|
|
|
@@ -400,22 +562,9 @@ proc loadHtml*(path: string, errors: var seq[string]): PXmlNode =
|
|
|
|
|
proc loadHtml*(path: string): PXmlNode =
|
|
|
|
|
## Loads and parses HTML from file specified by ``path``, and returns
|
|
|
|
|
## a ``PXmlNode``. All parsing errors are ignored.
|
|
|
|
|
var errors: seq[string] = @[]
|
|
|
|
|
var errors: seq[string] = @[]
|
|
|
|
|
result = loadHtml(path, errors)
|
|
|
|
|
|
|
|
|
|
when true:
|
|
|
|
|
nil
|
|
|
|
|
else:
|
|
|
|
|
proc checkHtmlAux(n: PXmlNode, errors: var seq[string]) =
|
|
|
|
|
nil
|
|
|
|
|
|
|
|
|
|
proc checkHtmlStructure*(n: PXmlNode, errors: var seq[string]) =
|
|
|
|
|
## checks the HTML structure after parsing for other errors like
|
|
|
|
|
## a ``<h1>`` element within a ``<p>`` element.
|
|
|
|
|
if n == nil or n.htmlTag != tagHtml:
|
|
|
|
|
errors.add("<html> tag expected")
|
|
|
|
|
checkHtmlAux(n, errors)
|
|
|
|
|
|
|
|
|
|
when isMainModule:
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
@@ -429,4 +578,3 @@ when isMainModule:
|
|
|
|
|
f.close()
|
|
|
|
|
else:
|
|
|
|
|
quit("cannot write test.txt")
|
|
|
|
|
|
|
|
|
|
|