mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-19 05:50:30 +00:00
bugfixes: htmlparser module
This commit is contained in:
@@ -136,6 +136,24 @@ proc find*(s: string, pattern: TRegEx, start = 0): int =
|
||||
## match, -1 is returned.
|
||||
var matches: array[0..maxSubpatterns-1, string]
|
||||
result = find(s, pattern, matches, start)
|
||||
|
||||
iterator findAll*(s: string, pattern: TRegEx, start = 0): string =
|
||||
## yields all matching captures of pattern in `s`.
|
||||
var matches: array[0..MaxSubpatterns-1, string]
|
||||
var i = start
|
||||
while true:
|
||||
var j = find(s, pattern, matches, i)
|
||||
if j < 0: break
|
||||
i = j
|
||||
for k in 0..maxSubPatterns-1:
|
||||
if isNil(matches[k]): break
|
||||
inc(i, matches[k].len)
|
||||
yield matches[k]
|
||||
|
||||
proc findAll*(s: string, pattern: TRegEx, start = 0): seq[string] =
|
||||
## returns all matching captures of pattern in `s`.
|
||||
## If it does not match, @[] is returned.
|
||||
accumulateResult(findAll(s, pattern, start))
|
||||
|
||||
template `=~` *(s: string, pattern: TRegEx): expr =
|
||||
## This calls ``match`` with an implicit declared ``matches`` array that
|
||||
|
||||
@@ -269,6 +269,7 @@ proc expected(x: var TXmlParser, n: PXmlNode): string =
|
||||
|
||||
proc untilElementEnd(x: var TXmlParser, result: PXmlNode,
|
||||
errors: var seq[string]) =
|
||||
# we parsed e.g. ``<br>`` and don't really expect a ``</br>``:
|
||||
if result.htmlTag in singleTags:
|
||||
if x.kind != xmlElementEnd or cmpIgnoreCase(x.elementName, result.tag) != 0:
|
||||
return
|
||||
@@ -277,10 +278,15 @@ proc untilElementEnd(x: var TXmlParser, result: PXmlNode,
|
||||
of xmlElementStart, xmlElementOpen:
|
||||
case result.htmlTag
|
||||
of tagLi, tagP, tagDt, tagDd, tagInput, tagOption:
|
||||
if htmlTag(x.elementName) notin InlineTags:
|
||||
# some tags are common to have no ``</end>``, like ``<li>``:
|
||||
# some tags are common to have no ``</end>``, like ``<li>``:
|
||||
if htmlTag(x.elementName) in {tagLi, tagP, tagDt, tagDd, tagInput,
|
||||
tagOption}:
|
||||
errors.add(expected(x, result))
|
||||
break
|
||||
when false:
|
||||
if htmlTag(x.elementName) notin InlineTags:
|
||||
errors.add(expected(x, result))
|
||||
break
|
||||
of tagTr, tagTd, tagTh, tagTfoot, tagThead:
|
||||
if htmlTag(x.elementName) in {tagTr, tagTd, tagTh, tagTfoot, tagThead}:
|
||||
errors.add(expected(x, result))
|
||||
@@ -367,9 +373,14 @@ proc parseHtml*(s: PStream, filename: string,
|
||||
# skip the DOCTYPE:
|
||||
if x.kind == xmlSpecial: next(x)
|
||||
result = parse(x, errors)
|
||||
while x.kind != xmlEof:
|
||||
if x.kind != xmlEof:
|
||||
errors.add(errorMsg(x, "EOF expected"))
|
||||
while x.kind != xmlEof:
|
||||
var oldPos = x.bufpos # little hack to see if we made any progess
|
||||
result.addNode(parse(x, errors))
|
||||
if x.bufpos == oldPos:
|
||||
# force progress!
|
||||
next(x)
|
||||
close(x)
|
||||
|
||||
proc parseHtml*(s: PStream): PXmlNode =
|
||||
|
||||
@@ -53,7 +53,7 @@ import
|
||||
|
||||
# the parser treats ``<br />`` as ``<br></br>``
|
||||
|
||||
## xmlElementCloseEnd, ## ``/>``
|
||||
# xmlElementCloseEnd, ## ``/>``
|
||||
|
||||
type
|
||||
TXmlEventKind* = enum ## enumation of all events that may occur when parsing
|
||||
|
||||
5
todo.txt
5
todo.txt
@@ -1,3 +1,8 @@
|
||||
- fix HTML parser bug: otherbug.html!
|
||||
- pegs: words are only special for definitions!
|
||||
- pegs: findAll
|
||||
|
||||
|
||||
High priority (version 0.9.0)
|
||||
=============================
|
||||
|
||||
|
||||
16
web/news.txt
16
web/news.txt
@@ -2,6 +2,22 @@
|
||||
News
|
||||
====
|
||||
|
||||
2010-XX-XX Version 0.8.12 released
|
||||
==================================
|
||||
|
||||
Bugfixes
|
||||
--------
|
||||
|
||||
- Bugfix: ``httpclient`` correct passes the path starting with ``/``.
|
||||
- Bugfixes for the ``htmlparser`` module.
|
||||
|
||||
|
||||
Additions
|
||||
---------
|
||||
|
||||
- Added ``re.findAll``.
|
||||
|
||||
|
||||
2010-10-20 Version 0.8.10 released
|
||||
==================================
|
||||
|
||||
|
||||
Reference in New Issue
Block a user