fixes #2429

2026-07-28 03:16:32 +00:00 · 2015-07-01 15:47:15 +02:00
parent 13259c669d
commit 0d7e0e1b4f
2 changed files with 178 additions and 156 deletions
--- a/lib/pure/lexbase.nim
+++ b/lib/pure/lexbase.nim
@@ -34,37 +34,15 @@ type
    lineNumber*: int          ## the current line number
    sentinel: int
    lineStart: int            # index of last line start in buffer
-    fileOpened: bool
+    refillChars: set[char]

 {.deprecated: [TBaseLexer: BaseLexer].}

-proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192)
-  ## inits the BaseLexer with a stream to read from
-
-proc close*(L: var BaseLexer)
-  ## closes the base lexer. This closes `L`'s associated stream too.
-
-proc getCurrentLine*(L: BaseLexer, marker: bool = true): string
-  ## retrieves the current line.
-
-proc getColNumber*(L: BaseLexer, pos: int): int
-  ## retrieves the current column.
-
-proc handleCR*(L: var BaseLexer, pos: int): int
-  ## Call this if you scanned over '\c' in the buffer; it returns the the
-  ## position to continue the scanning from. `pos` must be the position
-  ## of the '\c'.
-proc handleLF*(L: var BaseLexer, pos: int): int
-  ## Call this if you scanned over '\L' in the buffer; it returns the the
-  ## position to continue the scanning from. `pos` must be the position
-  ## of the '\L'.
-
-# implementation
-
 const
  chrSize = sizeof(char)

-proc close(L: var BaseLexer) =
+proc close*(L: var BaseLexer) =
+  ## closes the base lexer. This closes `L`'s associated stream too.
  dealloc(L.buf)
  close(L.input)

@@ -80,7 +58,7 @@ proc fillBuffer(L: var BaseLexer) =
  toCopy = L.bufLen - L.sentinel - 1
  assert(toCopy >= 0)
  if toCopy > 0:
-    moveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) 
+    moveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize)
    # "moveMem" handles overlapping regions
  charsRead = readData(L.input, addr(L.buf[toCopy]),
                       (L.sentinel + 1) * chrSize) div chrSize
@@ -93,7 +71,7 @@ proc fillBuffer(L: var BaseLexer) =
    dec(s)                    # BUGFIX (valgrind)
    while true:
      assert(s < L.bufLen)
-      while (s >= 0) and not (L.buf[s] in NewLines): dec(s)
+      while s >= 0 and L.buf[s] notin L.refillChars: dec(s)
      if s >= 0:
        # we found an appropriate character for a sentinel:
        L.sentinel = s
@@ -121,31 +99,46 @@ proc fillBaseLexer(L: var BaseLexer, pos: int): int =
    fillBuffer(L)
    L.bufpos = 0              # XXX: is this really correct?
    result = 0
-  L.lineStart = result

-proc handleCR(L: var BaseLexer, pos: int): int =
+proc handleCR*(L: var BaseLexer, pos: int): int =
+  ## Call this if you scanned over '\c' in the buffer; it returns the the
+  ## position to continue the scanning from. `pos` must be the position
+  ## of the '\c'.
  assert(L.buf[pos] == '\c')
  inc(L.lineNumber)
  result = fillBaseLexer(L, pos)
  if L.buf[result] == '\L':
    result = fillBaseLexer(L, result)
+  L.lineStart = result

-proc handleLF(L: var BaseLexer, pos: int): int =
+proc handleLF*(L: var BaseLexer, pos: int): int =
+  ## Call this if you scanned over '\L' in the buffer; it returns the the
+  ## position to continue the scanning from. `pos` must be the position
+  ## of the '\L'.
  assert(L.buf[pos] == '\L')
  inc(L.lineNumber)
  result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result;
+  L.lineStart = result
+
+proc handleRefillChar*(L: var BaseLexer, pos: int): int =
+  ## To be documented.
+  assert(L.buf[pos] in L.refillChars)
+  result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result;

 proc skipUtf8Bom(L: var BaseLexer) =
  if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'):
    inc(L.bufpos, 3)
    inc(L.lineStart, 3)

-proc open(L: var BaseLexer, input: Stream, bufLen: int = 8192) =
+proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192;
+           refillChars: set[char] = NewLines) =
+  ## inits the BaseLexer with a stream to read from.
  assert(bufLen > 0)
  assert(input != nil)
  L.input = input
  L.bufpos = 0
  L.bufLen = bufLen
+  L.refillChars = refillChars
  L.buf = cast[cstring](alloc(bufLen * chrSize))
  L.sentinel = bufLen - 1
  L.lineStart = 0
@@ -153,10 +146,12 @@ proc open(L: var BaseLexer, input: Stream, bufLen: int = 8192) =
  fillBuffer(L)
  skipUtf8Bom(L)

-proc getColNumber(L: BaseLexer, pos: int): int =
+proc getColNumber*(L: BaseLexer, pos: int): int =
+  ## retrieves the current column.
  result = abs(pos - L.lineStart)

-proc getCurrentLine(L: BaseLexer, marker: bool = true): string =
+proc getCurrentLine*(L: BaseLexer, marker: bool = true): string =
+  ## retrieves the current line.
  var i: int
  result = ""
  i = L.lineStart
@@ -166,4 +161,3 @@ proc getCurrentLine(L: BaseLexer, marker: bool = true): string =
  add(result, "\n")
  if marker:
    add(result, spaces(getColNumber(L, L.bufpos)) & "^\n")
-
--- a/lib/pure/parsexml.nim
+++ b/lib/pure/parsexml.nim
@@ -8,19 +8,19 @@
 #

 ## This module implements a simple high performance `XML`:idx: / `HTML`:idx:
-## parser. 
+## parser.
 ## The only encoding that is supported is UTF-8. The parser has been designed
-## to be somewhat error correcting, so that even most "wild HTML" found on the 
+## to be somewhat error correcting, so that even most "wild HTML" found on the
 ## web can be parsed with it. **Note:** This parser does not check that each
-## ``<tag>`` has a corresponding ``</tag>``! These checks have do be 
-## implemented by the client code for various reasons: 
+## ``<tag>`` has a corresponding ``</tag>``! These checks have do be
+## implemented by the client code for various reasons:
 ##
 ## * Old HTML contains tags that have no end tag: ``<br>`` for example.
 ## * HTML tags are case insensitive, XML tags are case sensitive. Since this
 ##   library can parse both, only the client knows which comparison is to be
 ##   used.
 ## * Thus the checks would have been very difficult to implement properly with
-##   little benefit, especially since they are simple to implement in the 
+##   little benefit, especially since they are simple to implement in the
 ##   client. The client should use the `errorMsgExpected` proc to generate
 ##   a nice error message that fits the other error messages this library
 ##   creates.
@@ -29,7 +29,7 @@
 ## Example 1: Retrieve HTML title
 ## ==============================
 ##
-## The file ``examples/htmltitle.nim`` demonstrates how to use the 
+## The file ``examples/htmltitle.nim`` demonstrates how to use the
 ## XML parser to accomplish a simple task: To determine the title of an HTML
 ## document.
 ##
@@ -40,22 +40,22 @@
 ## Example 2: Retrieve all HTML links
 ## ==================================
 ##
-## The file ``examples/htmlrefs.nim`` demonstrates how to use the 
-## XML parser to accomplish another simple task: To determine all the links 
+## The file ``examples/htmlrefs.nim`` demonstrates how to use the
+## XML parser to accomplish another simple task: To determine all the links
 ## an HTML document contains.
 ##
 ## .. code-block:: nim
 ##     :file: examples/htmlrefs.nim
 ##

-import 
+import
  hashes, strutils, lexbase, streams, unicode

 # the parser treats ``<br />`` as ``<br></br>``

-#  xmlElementCloseEnd, ## ``/>`` 
+#  xmlElementCloseEnd, ## ``/>``

-type 
+type
  XmlEventKind* = enum ## enumation of all events that may occur when parsing
    xmlError,           ## an error occurred during parsing
    xmlEof,             ## end of file reached
@@ -65,13 +65,13 @@ type
    xmlPI,              ## processing instruction (``<?name something ?>``)
    xmlElementStart,    ## ``<elem>``
    xmlElementEnd,      ## ``</elem>``
-    xmlElementOpen,     ## ``<elem 
+    xmlElementOpen,     ## ``<elem
    xmlAttribute,       ## ``key = "value"`` pair
-    xmlElementClose,    ## ``>`` 
+    xmlElementClose,    ## ``>``
    xmlCData,           ## ``<![CDATA[`` ... data ... ``]]>``
    xmlEntity,          ## &entity;
    xmlSpecial          ## ``<! ... data ... >``
-    
+
  XmlErrorKind* = enum       ## enumeration that lists all errors that can occur
    errNone,                 ## no error
    errEndOfCDataExpected,   ## ``]]>`` expected
@@ -82,8 +82,8 @@ type
    errEqExpected,           ## ``=`` expected
    errQuoteExpected,        ## ``"`` or ``'`` expected
    errEndOfCommentExpected  ## ``-->`` expected
-    
-  ParserState = enum 
+
+  ParserState = enum
    stateStart, stateNormal, stateAttr, stateEmptyElementTag, stateError

  XmlParseOption* = enum  ## options for the XML parser
@@ -121,8 +121,8 @@ proc open*(my: var XmlParser, input: Stream, filename: string,
  ## the `options` parameter: If `options` contains ``reportWhitespace``
  ## a whitespace token is reported as an ``xmlWhitespace`` event.
  ## If `options` contains ``reportComments`` a comment token is reported as an
-  ## ``xmlComment`` event. 
-  lexbase.open(my, input)
+  ## ``xmlComment`` event.
+  lexbase.open(my, input, 8192, {'\c', '\L', '/'})
  my.filename = filename
  my.state = stateStart
  my.kind = xmlError
@@ -130,24 +130,24 @@ proc open*(my: var XmlParser, input: Stream, filename: string,
  my.b = ""
  my.c = nil
  my.options = options
-  
-proc close*(my: var XmlParser) {.inline.} = 
+
+proc close*(my: var XmlParser) {.inline.} =
  ## closes the parser `my` and its associated input stream.
  lexbase.close(my)

-proc kind*(my: XmlParser): XmlEventKind {.inline.} = 
+proc kind*(my: XmlParser): XmlEventKind {.inline.} =
  ## returns the current event type for the XML parser
  return my.kind

 template charData*(my: XmlParser): string =
-  ## returns the character data for the events: ``xmlCharData``, 
+  ## returns the character data for the events: ``xmlCharData``,
  ## ``xmlWhitespace``, ``xmlComment``, ``xmlCData``, ``xmlSpecial``
-  assert(my.kind in {xmlCharData, xmlWhitespace, xmlComment, xmlCData, 
+  assert(my.kind in {xmlCharData, xmlWhitespace, xmlComment, xmlCData,
                     xmlSpecial})
  my.a

 template elementName*(my: XmlParser): string =
-  ## returns the element name for the events: ``xmlElementStart``, 
+  ## returns the element name for the events: ``xmlElementStart``,
  ## ``xmlElementEnd``, ``xmlElementOpen``
  assert(my.kind in {xmlElementStart, xmlElementEnd, xmlElementOpen})
  my.a
@@ -156,12 +156,12 @@ template entityName*(my: XmlParser): string =
  ## returns the entity name for the event: ``xmlEntity``
  assert(my.kind == xmlEntity)
  my.a
-  
+
 template attrKey*(my: XmlParser): string =
  ## returns the attribute key for the event ``xmlAttribute``
  assert(my.kind == xmlAttribute)
  my.a
-  
+
 template attrValue*(my: XmlParser): string =
  ## returns the attribute value for the event ``xmlAttribute``
  assert(my.kind == xmlAttribute)
@@ -187,110 +187,118 @@ proc rawData2*(my: XmlParser): string {.inline.} =
  ## This is only used for speed hacks.
  shallowCopy(result, my.b)

-proc getColumn*(my: XmlParser): int {.inline.} = 
+proc getColumn*(my: XmlParser): int {.inline.} =
  ## get the current column the parser has arrived at.
  result = getColNumber(my, my.bufpos)

-proc getLine*(my: XmlParser): int {.inline.} = 
+proc getLine*(my: XmlParser): int {.inline.} =
  ## get the current line the parser has arrived at.
  result = my.lineNumber

-proc getFilename*(my: XmlParser): string {.inline.} = 
+proc getFilename*(my: XmlParser): string {.inline.} =
  ## get the filename of the file that the parser processes.
  result = my.filename
-  
-proc errorMsg*(my: XmlParser): string = 
+
+proc errorMsg*(my: XmlParser): string =
  ## returns a helpful error message for the event ``xmlError``
  assert(my.kind == xmlError)
  result = "$1($2, $3) Error: $4" % [
    my.filename, $getLine(my), $getColumn(my), errorMessages[my.err]]

-proc errorMsgExpected*(my: XmlParser, tag: string): string = 
+proc errorMsgExpected*(my: XmlParser, tag: string): string =
  ## returns an error message "<tag> expected" in the same format as the
-  ## other error messages 
+  ## other error messages
  result = "$1($2, $3) Error: $4" % [
    my.filename, $getLine(my), $getColumn(my), "<$1> expected" % tag]

-proc errorMsg*(my: XmlParser, msg: string): string = 
+proc errorMsg*(my: XmlParser, msg: string): string =
  ## returns an error message with text `msg` in the same format as the
-  ## other error messages 
+  ## other error messages
  result = "$1($2, $3) Error: $4" % [
    my.filename, $getLine(my), $getColumn(my), msg]
-    
-proc markError(my: var XmlParser, kind: XmlErrorKind) {.inline.} = 
+
+proc markError(my: var XmlParser, kind: XmlErrorKind) {.inline.} =
  my.err = kind
  my.state = stateError

-proc parseCDATA(my: var XmlParser) = 
+proc parseCDATA(my: var XmlParser) =
  var pos = my.bufpos + len("<![CDATA[")
  var buf = my.buf
  while true:
-    case buf[pos] 
+    case buf[pos]
    of ']':
      if buf[pos+1] == ']' and buf[pos+2] == '>':
        inc(pos, 3)
        break
      add(my.a, ']')
      inc(pos)
-    of '\0': 
+    of '\0':
      markError(my, errEndOfCDataExpected)
      break
-    of '\c': 
+    of '\c':
      pos = lexbase.handleCR(my, pos)
      buf = my.buf
      add(my.a, '\L')
-    of '\L': 
+    of '\L':
      pos = lexbase.handleLF(my, pos)
      buf = my.buf
      add(my.a, '\L')
+    of '/':
+      pos = lexbase.handleRefillChar(my, pos)
+      buf = my.buf
+      add(my.a, '/')
    else:
      add(my.a, buf[pos])
-      inc(pos)    
+      inc(pos)
  my.bufpos = pos # store back
  my.kind = xmlCData

-proc parseComment(my: var XmlParser) = 
+proc parseComment(my: var XmlParser) =
  var pos = my.bufpos + len("<!--")
  var buf = my.buf
  while true:
-    case buf[pos] 
+    case buf[pos]
    of '-':
      if buf[pos+1] == '-' and buf[pos+2] == '>':
        inc(pos, 3)
        break
      if my.options.contains(reportComments): add(my.a, '-')
      inc(pos)
-    of '\0': 
+    of '\0':
      markError(my, errEndOfCommentExpected)
      break
-    of '\c': 
+    of '\c':
      pos = lexbase.handleCR(my, pos)
      buf = my.buf
      if my.options.contains(reportComments): add(my.a, '\L')
-    of '\L': 
+    of '\L':
      pos = lexbase.handleLF(my, pos)
      buf = my.buf
      if my.options.contains(reportComments): add(my.a, '\L')
+    of '/':
+      pos = lexbase.handleRefillChar(my, pos)
+      buf = my.buf
+      if my.options.contains(reportComments): add(my.a, '/')
    else:
      if my.options.contains(reportComments): add(my.a, buf[pos])
      inc(pos)
  my.bufpos = pos
  my.kind = xmlComment

-proc parseWhitespace(my: var XmlParser, skip=false) = 
+proc parseWhitespace(my: var XmlParser, skip=false) =
  var pos = my.bufpos
  var buf = my.buf
-  while true: 
+  while true:
    case buf[pos]
-    of ' ', '\t': 
+    of ' ', '\t':
      if not skip: add(my.a, buf[pos])
      inc(pos)
-    of '\c':  
+    of '\c':
      # the specification says that CR-LF, CR are to be transformed to LF
      pos = lexbase.handleCR(my, pos)
      buf = my.buf
      if not skip: add(my.a, '\L')
-    of '\L': 
+    of '\L':
      pos = lexbase.handleLF(my, pos)
      buf = my.buf
      if not skip: add(my.a, '\L')
@@ -302,10 +310,10 @@ const
  NameStartChar = {'A'..'Z', 'a'..'z', '_', ':', '\128'..'\255'}
  NameChar = {'A'..'Z', 'a'..'z', '0'..'9', '.', '-', '_', ':', '\128'..'\255'}

-proc parseName(my: var XmlParser, dest: var string) = 
+proc parseName(my: var XmlParser, dest: var string) =
  var pos = my.bufpos
  var buf = my.buf
-  if buf[pos] in NameStartChar: 
+  if buf[pos] in NameStartChar:
    while true:
      add(dest, buf[pos])
      inc(pos)
@@ -314,14 +322,14 @@ proc parseName(my: var XmlParser, dest: var string) =
  else:
    markError(my, errNameExpected)

-proc parseEntity(my: var XmlParser, dest: var string) = 
+proc parseEntity(my: var XmlParser, dest: var string) =
  var pos = my.bufpos+1
  var buf = my.buf
  my.kind = xmlCharData
  if buf[pos] == '#':
    var r: int
    inc(pos)
-    if buf[pos] == 'x': 
+    if buf[pos] == 'x':
      inc(pos)
      while true:
        case buf[pos]
@@ -331,7 +339,7 @@ proc parseEntity(my: var XmlParser, dest: var string) =
        else: break
        inc(pos)
    else:
-      while buf[pos] in {'0'..'9'}: 
+      while buf[pos] in {'0'..'9'}:
        r = r * 10 + (ord(buf[pos]) - ord('0'))
        inc(pos)
    add(dest, toUTF8(Rune(r)))
@@ -345,11 +353,11 @@ proc parseEntity(my: var XmlParser, dest: var string) =
      buf[pos+3] == ';':
    add(dest, '&')
    inc(pos, 3)
-  elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and 
+  elif buf[pos] == 'a' and buf[pos+1] == 'p' and buf[pos+2] == 'o' and
      buf[pos+3] == 's' and buf[pos+4] == ';':
    add(dest, '\'')
    inc(pos, 4)
-  elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and 
+  elif buf[pos] == 'q' and buf[pos+1] == 'u' and buf[pos+2] == 'o' and
      buf[pos+3] == 't' and buf[pos+4] == ';':
    add(dest, '"')
    inc(pos, 4)
@@ -357,23 +365,23 @@ proc parseEntity(my: var XmlParser, dest: var string) =
    my.bufpos = pos
    parseName(my, dest)
    pos = my.bufpos
-    if my.err != errNameExpected: 
+    if my.err != errNameExpected:
      my.kind = xmlEntity
    else:
      add(dest, '&')
-  if buf[pos] == ';': 
+  if buf[pos] == ';':
    inc(pos)
  else:
    markError(my, errSemicolonExpected)
  my.bufpos = pos

-proc parsePI(my: var XmlParser) = 
+proc parsePI(my: var XmlParser) =
  inc(my.bufpos, "<?".len)
  parseName(my, my.a)
  var pos = my.bufpos
  var buf = my.buf
  setLen(my.b, 0)
-  while true: 
+  while true:
    case buf[pos]
    of '\0':
      markError(my, errQmGtExpected)
@@ -387,29 +395,33 @@ proc parsePI(my: var XmlParser) =
    of '\c':
      # the specification says that CR-LF, CR are to be transformed to LF
      pos = lexbase.handleCR(my, pos)
-      buf = my.buf      
+      buf = my.buf
      add(my.b, '\L')
-    of '\L': 
+    of '\L':
      pos = lexbase.handleLF(my, pos)
      buf = my.buf
      add(my.b, '\L')
+    of '/':
+      pos = lexbase.handleRefillChar(my, pos)
+      buf = my.buf
+      add(my.b, '/')
    else:
      add(my.b, buf[pos])
      inc(pos)
  my.bufpos = pos
  my.kind = xmlPI

-proc parseSpecial(my: var XmlParser) = 
+proc parseSpecial(my: var XmlParser) =
  # things that start with <!
  var pos = my.bufpos + 2
  var buf = my.buf
  var opentags = 0
-  while true: 
+  while true:
    case buf[pos]
    of '\0':
      markError(my, errGtExpected)
      break
-    of '<': 
+    of '<':
      inc(opentags)
      inc(pos)
      add(my.a, '<')
@@ -420,47 +432,55 @@ proc parseSpecial(my: var XmlParser) =
      dec(opentags)
      inc(pos)
      add(my.a, '>')
-    of '\c':  
+    of '\c':
      pos = lexbase.handleCR(my, pos)
      buf = my.buf
      add(my.a, '\L')
-    of '\L': 
+    of '\L':
      pos = lexbase.handleLF(my, pos)
      buf = my.buf
      add(my.a, '\L')
+    of '/':
+      pos = lexbase.handleRefillChar(my, pos)
+      buf = my.buf
+      add(my.b, '/')
    else:
      add(my.a, buf[pos])
      inc(pos)
  my.bufpos = pos
  my.kind = xmlSpecial

-proc parseTag(my: var XmlParser) = 
+proc parseTag(my: var XmlParser) =
  inc(my.bufpos)
  parseName(my, my.a)
  # if we have no name, do not interpret the '<':
-  if my.a.len == 0: 
+  if my.a.len == 0:
    my.kind = xmlCharData
    add(my.a, '<')
    return
  parseWhitespace(my, skip=true)
-  if my.buf[my.bufpos] in NameStartChar: 
+  if my.buf[my.bufpos] in NameStartChar:
    # an attribute follows:
    my.kind = xmlElementOpen
    my.state = stateAttr
    my.c = my.a # save for later
  else:
    my.kind = xmlElementStart
-    if my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>':
-      inc(my.bufpos, 2)
+    let slash = my.buf[my.bufpos] == '/'
+    if slash:
+      my.bufpos = lexbase.handleRefillChar(my, my.bufpos)
+    if slash and my.buf[my.bufpos] == '>':
+      inc(my.bufpos)
      my.state = stateEmptyElementTag
      my.c = nil
    elif my.buf[my.bufpos] == '>':
-      inc(my.bufpos)  
+      inc(my.bufpos)
    else:
      markError(my, errGtExpected)
-  
-proc parseEndTag(my: var XmlParser) = 
-  inc(my.bufpos, 2)
+
+proc parseEndTag(my: var XmlParser) =
+  my.bufpos = lexbase.handleRefillChar(my, my.bufpos+1)
+  #inc(my.bufpos, 2)
  parseName(my, my.a)
  parseWhitespace(my, skip=true)
  if my.buf[my.bufpos] == '>':
@@ -469,13 +489,13 @@ proc parseEndTag(my: var XmlParser) =
    markError(my, errGtExpected)
  my.kind = xmlElementEnd

-proc parseAttribute(my: var XmlParser) = 
+proc parseAttribute(my: var XmlParser) =
  my.kind = xmlAttribute
  setLen(my.a, 0)
  setLen(my.b, 0)
  parseName(my, my.a)
  # if we have no name, we have '<tag attr= key %&$$%':
-  if my.a.len == 0: 
+  if my.a.len == 0:
    markError(my, errGtExpected)
    return
  parseWhitespace(my, skip=true)
@@ -491,27 +511,27 @@ proc parseAttribute(my: var XmlParser) =
    var quote = buf[pos]
    var pendingSpace = false
    inc(pos)
-    while true: 
+    while true:
      case buf[pos]
      of '\0':
        markError(my, errQuoteExpected)
        break
-      of '&': 
-        if pendingSpace: 
+      of '&':
+        if pendingSpace:
          add(my.b, ' ')
          pendingSpace = false
        my.bufpos = pos
        parseEntity(my, my.b)
        my.kind = xmlAttribute # parseEntity overwrites my.kind!
        pos = my.bufpos
-      of ' ', '\t': 
+      of ' ', '\t':
        pendingSpace = true
        inc(pos)
-      of '\c':  
+      of '\c':
        pos = lexbase.handleCR(my, pos)
        buf = my.buf
        pendingSpace = true
-      of '\L': 
+      of '\L':
        pos = lexbase.handleLF(my, pos)
        buf = my.buf
        pendingSpace = true
@@ -520,44 +540,48 @@ proc parseAttribute(my: var XmlParser) =
          inc(pos)
          break
        else:
-          if pendingSpace: 
+          if pendingSpace:
            add(my.b, ' ')
            pendingSpace = false
          add(my.b, buf[pos])
          inc(pos)
  else:
-    markError(my, errQuoteExpected)  
+    markError(my, errQuoteExpected)
  my.bufpos = pos
  parseWhitespace(my, skip=true)
-  
-proc parseCharData(my: var XmlParser) = 
+
+proc parseCharData(my: var XmlParser) =
  var pos = my.bufpos
  var buf = my.buf
-  while true: 
+  while true:
    case buf[pos]
    of '\0', '<', '&': break
-    of '\c':  
+    of '\c':
      # the specification says that CR-LF, CR are to be transformed to LF
      pos = lexbase.handleCR(my, pos)
      buf = my.buf
      add(my.a, '\L')
-    of '\L': 
+    of '\L':
      pos = lexbase.handleLF(my, pos)
      buf = my.buf
      add(my.a, '\L')
+    of '/':
+      pos = lexbase.handleRefillChar(my, pos)
+      buf = my.buf
+      add(my.a, '/')
    else:
      add(my.a, buf[pos])
      inc(pos)
  my.bufpos = pos
  my.kind = xmlCharData

-proc rawGetTok(my: var XmlParser) = 
+proc rawGetTok(my: var XmlParser) =
  my.kind = xmlError
  setLen(my.a, 0)
  var pos = my.bufpos
  var buf = my.buf
  case buf[pos]
-  of '<': 
+  of '<':
    case buf[pos+1]
    of '/':
      parseEndTag(my)
@@ -566,44 +590,44 @@ proc rawGetTok(my: var XmlParser) =
          buf[pos+5] == 'A' and buf[pos+6] == 'T' and buf[pos+7] == 'A' and
          buf[pos+8] == '[':
        parseCDATA(my)
-      elif buf[pos+2] == '-' and buf[pos+3] == '-': 
+      elif buf[pos+2] == '-' and buf[pos+3] == '-':
        parseComment(my)
-      else: 
+      else:
        parseSpecial(my)
    of '?':
      parsePI(my)
-    else: 
+    else:
      parseTag(my)
-  of ' ', '\t', '\c', '\l': 
+  of ' ', '\t', '\c', '\l':
    parseWhitespace(my)
    my.kind = xmlWhitespace
-  of '\0': 
+  of '\0':
    my.kind = xmlEof
  of '&':
    parseEntity(my, my.a)
-  else: 
+  else:
    parseCharData(my)
  assert my.kind != xmlError
-    
-proc getTok(my: var XmlParser) = 
+
+proc getTok(my: var XmlParser) =
  while true:
    rawGetTok(my)
    case my.kind
-    of xmlComment: 
+    of xmlComment:
      if my.options.contains(reportComments): break
-    of xmlWhitespace: 
+    of xmlWhitespace:
      if my.options.contains(reportWhitespace): break
    else: break
-    
-proc next*(my: var XmlParser) = 
+
+proc next*(my: var XmlParser) =
  ## retrieves the first/next event. This controls the parser.
  case my.state
  of stateNormal:
-    getTok(my)  
+    getTok(my)
  of stateStart:
    my.state = stateNormal
    getTok(my)
-    if my.kind == xmlPI and my.a == "xml": 
+    if my.kind == xmlPI and my.a == "xml":
      # just skip the first ``<?xml >`` processing instruction
      getTok(my)
  of stateAttr:
@@ -612,10 +636,14 @@ proc next*(my: var XmlParser) =
      my.kind = xmlElementClose
      inc(my.bufpos)
      my.state = stateNormal
-    elif my.buf[my.bufpos] == '/' and my.buf[my.bufpos+1] == '>': 
-      my.kind = xmlElementClose
-      inc(my.bufpos, 2)
-      my.state = stateEmptyElementTag
+    elif my.buf[my.bufpos] == '/':
+      my.bufpos = lexbase.handleRefillChar(my, my.bufpos)
+      if my.buf[my.bufpos] == '>':
+        my.kind = xmlElementClose
+        inc(my.bufpos)
+        my.state = stateEmptyElementTag
+      else:
+        markError(my, errGtExpected)
    else:
      parseAttribute(my)
      # state remains the same
@@ -624,10 +652,10 @@ proc next*(my: var XmlParser) =
    my.kind = xmlElementEnd
    if not my.c.isNil:
      my.a = my.c
-  of stateError: 
+  of stateError:
    my.kind = xmlError
    my.state = stateNormal
-  
+
 when not defined(testing) and isMainModule:
  import os
  var s = newFileStream(paramStr(1), fmRead)
@@ -645,13 +673,13 @@ when not defined(testing) and isMainModule:
    of xmlPI: echo("<? $1 ## $2 ?>" % [x.piName, x.piRest])
    of xmlElementStart: echo("<$1>" % x.elementName)
    of xmlElementEnd: echo("</$1>" % x.elementName)
-    
-    of xmlElementOpen: echo("<$1" % x.elementName) 
-    of xmlAttribute:   
+
+    of xmlElementOpen: echo("<$1" % x.elementName)
+    of xmlAttribute:
      echo("Key: " & x.attrKey)
      echo("Value: " & x.attrValue)
-    
-    of xmlElementClose: echo(">") 
+
+    of xmlElementClose: echo(">")
    of xmlCData:
      echo("<![CDATA[$1]]>" % x.charData)
    of xmlEntity: