Fold XML attribute whitespace.

This commit is contained in:
Jeroen van Rijn
2024-06-12 13:30:00 +02:00
parent ebadff555d
commit 2fe961cbcd
3 changed files with 47 additions and 11 deletions

View File

@@ -89,7 +89,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
t := Tokenizer{src=input}
in_data := false
prev: rune
prev: rune = ' '
loop: for {
advance(&t) or_return
@@ -153,18 +153,32 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
write_string(&builder, ";")
}
} else {
// https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-line-ends
switch t.r {
case '\n', 0x85, 0x2028:
write_rune(&builder, '\n')
case '\r': // Do nothing until next character
case:
if prev == '\r' { // Turn a single carriage return into a \n
write_rune(&builder, '\n')
// Handle AV Normalization: https://www.w3.org/TR/2006/REC-xml11-20060816/#AVNormalize
if .Normalize_Whitespace in options {
switch t.r {
case ' ', '\r', '\n', '\t':
if prev != ' ' {
write_rune(&builder, ' ')
prev = ' '
}
case:
write_rune(&builder, t.r)
prev = t.r
}
write_rune(&builder, t.r)
} else {
// https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-line-ends
switch t.r {
case '\n', 0x85, 0x2028:
write_rune(&builder, '\n')
case '\r': // Do nothing until next character
case:
if prev == '\r' { // Turn a single carriage return into a \n
write_rune(&builder, '\n')
}
write_rune(&builder, t.r)
}
prev = t.r
}
prev = t.r
}
}
}

View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE foozle>
<foozle>Barzle</foozle>
<부끄러운:barzle>
<name foo:bar=" birmese
kittens have
fur ">Indeed!</name>
</부끄러운:barzle>

View File

@@ -146,6 +146,20 @@ xml_test_entities_unbox_decode :: proc(t: ^testing.T) {
})
}
@(test)
xml_test_attribute_whitespace :: proc(t: ^testing.T) {
run_test(t, {
// Same as above.
// Unbox CDATA in data tag.
filename = "XML/attribute-whitespace.xml",
options = {
flags = {},
expected_doctype = "foozle",
},
crc32 = 0x8f5fd6c1,
})
}
@(test)
xml_test_invalid_doctype :: proc(t: ^testing.T) {
run_test(t, {