diff --git a/core/encoding/entity/entity.odin b/core/encoding/entity/entity.odin index 280be9377..f5208ad6f 100644 --- a/core/encoding/entity/entity.odin +++ b/core/encoding/entity/entity.odin @@ -89,7 +89,7 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := t := Tokenizer{src=input} in_data := false - prev: rune + prev: rune = ' ' loop: for { advance(&t) or_return @@ -153,18 +153,32 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator := write_string(&builder, ";") } } else { - // https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-line-ends - switch t.r { - case '\n', 0x85, 0x2028: - write_rune(&builder, '\n') - case '\r': // Do nothing until next character - case: - if prev == '\r' { // Turn a single carriage return into a \n - write_rune(&builder, '\n') + // Handle AV Normalization: https://www.w3.org/TR/2006/REC-xml11-20060816/#AVNormalize + if .Normalize_Whitespace in options { + switch t.r { + case ' ', '\r', '\n', '\t': + if prev != ' ' { + write_rune(&builder, ' ') + prev = ' ' + } + case: + write_rune(&builder, t.r) + prev = t.r } - write_rune(&builder, t.r) + } else { + // https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-line-ends + switch t.r { + case '\n', 0x85, 0x2028: + write_rune(&builder, '\n') + case '\r': // Do nothing until next character + case: + if prev == '\r' { // Turn a single carriage return into a \n + write_rune(&builder, '\n') + } + write_rune(&builder, t.r) + } + prev = t.r } - prev = t.r } } } diff --git a/tests/core/assets/XML/attribute-whitespace.xml b/tests/core/assets/XML/attribute-whitespace.xml new file mode 100644 index 000000000..6381225d5 --- /dev/null +++ b/tests/core/assets/XML/attribute-whitespace.xml @@ -0,0 +1,8 @@ + + +Barzle +<부끄러운:barzle> + Indeed! + \ No newline at end of file diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin index 09d1a4611..b29431e10 100644 --- a/tests/core/encoding/xml/test_core_xml.odin +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -146,6 +146,20 @@ xml_test_entities_unbox_decode :: proc(t: ^testing.T) { }) } +@(test) +xml_test_attribute_whitespace :: proc(t: ^testing.T) { + run_test(t, { + // Same as above. + // Unbox CDATA in data tag. + filename = "XML/attribute-whitespace.xml", + options = { + flags = {}, + expected_doctype = "foozle", + }, + crc32 = 0x8f5fd6c1, + }) +} + @(test) xml_test_invalid_doctype :: proc(t: ^testing.T) { run_test(t, {