mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-28 17:04:41 +00:00
58 lines
1.7 KiB
Nim
58 lines
1.7 KiB
Nim
# Example program to show the new parsexml module
|
|
# This program reads an HTML file and writes all its used links to stdout.
|
|
# Errors and whitespace are ignored.
|
|
|
|
import os, streams, parsexml, strutils
|
|
|
|
proc `=?=` (a, b: string): bool =
|
|
# little trick: define our own comparator that ignores case
|
|
return cmpIgnoreCase(a, b) == 0
|
|
|
|
if paramCount() < 1:
|
|
quit("Usage: htmlrefs filename[.html]")
|
|
|
|
var links = 0 # count the number of links
|
|
var filename = addFileExt(ParamStr(1), "html")
|
|
var s = newFileStream(filename, fmRead)
|
|
if s == nil: quit("cannot open the file " & filename)
|
|
var x: TXmlParser
|
|
open(x, s, filename)
|
|
next(x) # get first event
|
|
block mainLoop:
|
|
while true:
|
|
case x.kind
|
|
of xmlElementOpen:
|
|
# the <a href = "xyz"> tag we are interested in always has an attribute,
|
|
# thus we search for ``xmlElementOpen`` and not for ``xmlElementStart``
|
|
if x.elementName =?= "a":
|
|
x.next()
|
|
if x.kind == xmlAttribute:
|
|
if x.attrKey =?= "href":
|
|
var link = x.attrValue
|
|
inc(links)
|
|
# skip until we have an ``xmlElementClose`` event
|
|
while true:
|
|
x.next()
|
|
case x.kind
|
|
of xmlEof: break mainLoop
|
|
of xmlElementClose: break
|
|
else: nil
|
|
x.next() # skip ``xmlElementClose``
|
|
# now we have the description for the ``a`` element
|
|
var desc = ""
|
|
while x.kind == xmlCharData:
|
|
desc.add(x.charData)
|
|
x.next()
|
|
Echo(desc & ": " & link)
|
|
else:
|
|
x.next()
|
|
of xmlEof: break # end of file reached
|
|
of xmlError:
|
|
Echo(errorMsg(x))
|
|
x.next()
|
|
else: x.next() # skip other events
|
|
|
|
echo($links & " link(s) found!")
|
|
x.close()
|
|
|