diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin
index 280be9377..f5208ad6f 100644
--- a/core/encoding/entity/entity.odin
+++ b/core/encoding/entity/entity.odin
@@ -89,7 +89,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
t := Tokenizer{src=input}
in_data := false
- prev: rune
+ prev: rune = ' '
loop: for {
advance(&t) or_return
@@ -153,18 +153,32 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
write_string(&builder, ";")
}
} else {
- // https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-line-ends
- switch t.r {
- case '\n', 0x85, 0x2028:
- write_rune(&builder, '\n')
- case '\r': // Do nothing until next character
- case:
- if prev == '\r' { // Turn a single carriage return into a \n
- write_rune(&builder, '\n')
+ // Handle AV Normalization: https://www.w3.org/TR/2006/REC-xml11-20060816/#AVNormalize
+ if .Normalize_Whitespace in options {
+ switch t.r {
+ case ' ', '\r', '\n', '\t':
+ if prev != ' ' {
+ write_rune(&builder, ' ')
+ prev = ' '
+ }
+ case:
+ write_rune(&builder, t.r)
+ prev = t.r
}
- write_rune(&builder, t.r)
+ } else {
+ // https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-line-ends
+ switch t.r {
+ case '\n', 0x85, 0x2028:
+ write_rune(&builder, '\n')
+ case '\r': // Do nothing until next character
+ case:
+ if prev == '\r' { // Turn a single carriage return into a \n
+ write_rune(&builder, '\n')
+ }
+ write_rune(&builder, t.r)
+ }
+ prev = t.r
}
- prev = t.r
}
}
}
diff --git a/tests/core/assets/XML/attribute-whitespace.xml b/tests/core/assets/XML/attribute-whitespace.xml
new file mode 100644
index 000000000..6381225d5
--- /dev/null
+++ b/tests/core/assets/XML/attribute-whitespace.xml
@@ -0,0 +1,8 @@
+
+
+Barzle
+<부끄러운:barzle>
+ Indeed!
+부끄러운:barzle>
\ No newline at end of file
diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin
index 09d1a4611..b29431e10 100644
--- a/tests/core/encoding/xml/test_core_xml.odin
+++ b/tests/core/encoding/xml/test_core_xml.odin
@@ -146,6 +146,20 @@ xml_test_entities_unbox_decode :: proc(t: ^testing.T) {
})
}
+@(test)
+xml_test_attribute_whitespace :: proc(t: ^testing.T) {
+ run_test(t, {
+ // Same as above.
+ // Unbox CDATA in data tag.
+ filename = "XML/attribute-whitespace.xml",
+ options = {
+ flags = {},
+ expected_doctype = "foozle",
+ },
+ crc32 = 0x8f5fd6c1,
+ })
+}
+
@(test)
xml_test_invalid_doctype :: proc(t: ^testing.T) {
run_test(t, {