Add parsing empty attribs to htmlparser (#9559)

This commit is contained in:
rec
2018-10-30 06:58:39 +01:00
committed by Andreas Rumpf
parent 69c0a9c6fb
commit 9899c4525c
3 changed files with 23 additions and 3 deletions

View File

@@ -2014,7 +2014,8 @@ proc parseHtml*(s: Stream, filename: string,
## Parses the XML from stream `s` and returns a ``XmlNode``. Every
## occurred parsing error is added to the `errors` sequence.
var x: XmlParser
open(x, s, filename, {reportComments, reportWhitespace, allowUnquotedAttribs})
open(x, s, filename, {reportComments, reportWhitespace, allowUnquotedAttribs,
allowEmptyAttribs})
next(x)
# skip the DOCTYPE:
if x.kind == xmlSpecial: next(x)

View File

@@ -189,6 +189,7 @@ type
reportWhitespace, ## report whitespace
reportComments ## report comments
allowUnquotedAttribs ## allow unquoted attribute values (for HTML)
allowEmptyAttribs ## allow empty attributes (without explicit value)
XmlParser* = object of BaseLexer ## the parser object.
a, b, c: string
@@ -621,10 +622,15 @@ proc parseAttribute(my: var XmlParser) =
if my.a.len == 0:
markError(my, errGtExpected)
return
let startPos = my.bufpos
parseWhitespace(my, skip=true)
if my.buf[my.bufpos] != '=':
markError(my, errEqExpected)
if allowEmptyAttribs notin my.options or
(my.buf[my.bufpos] != '>' and my.bufpos == startPos):
markError(my, errEqExpected)
return
inc(my.bufpos)
parseWhitespace(my, skip=true)

View File

@@ -92,6 +92,8 @@ block t6154:
<p something= &#9;foo&#9;bar&#178; foo =bloo></p>
<p class="foo2" id="bar2"></p>
<p wrong= ></p>
<p data-foo data-bar="correct!" enabled ></p>
<p quux whatever></p>
</body>
</html>
"""
@@ -100,7 +102,7 @@ block t6154:
let html = parseHtml(newStringStream(foo), "statichtml", errors=errors)
doAssert "statichtml(11, 18) Error: attribute value expected" in errors
let ps = html.findAll("p")
doAssert ps.len == 5
doAssert ps.len == 7
doAssert ps[0].attrsLen == 2
doAssert ps[0].attr("class") == "foo"
@@ -123,3 +125,14 @@ block t6154:
doAssert ps[4].attrsLen == 1
doAssert ps[4].attr("wrong") == ""
doAssert ps[5].attrsLen == 3
doAssert ps[5].attr("data-foo") == ""
doAssert ps[5].attr("data-bar") == "correct!"
doAssert ps[5].attr("enabled") == ""
doassert ps[5].len == 0
doAssert ps[6].attrsLen == 2
doAssert ps[6].attr("quux") == ""
doAssert ps[6].attr("whatever") == ""
doassert ps[6].len == 0