diff --git a/core/encoding/entity/example/entity_example.odin b/core/encoding/entity/example/entity_example.odin index 882203f48..6fc377f9d 100644 --- a/core/encoding/entity/example/entity_example.odin +++ b/core/encoding/entity/example/entity_example.odin @@ -64,8 +64,8 @@ main :: proc() { mem.tracking_allocator_init(&track, context.allocator) context.allocator = mem.tracking_allocator(&track) - _main() - //_entities() + // _main() + _entities() if len(track.allocation_map) > 0 { println() diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin index 085252e92..daa3c5dab 100644 --- a/core/encoding/xml/example/xml_example.odin +++ b/core/encoding/xml/example/xml_example.odin @@ -2,35 +2,40 @@ package xml_example import "core:encoding/xml" import "core:os" -import "core:path" import "core:mem" import "core:fmt" - -/* - Silent error handler for the parser. -*/ -Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {} - -OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", } +import "core:time" +import "core:strings" +import "core:hash" example :: proc() { using fmt - filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml") - defer delete(filename) + doc: ^xml.Document + err: xml.Error - doc, err := xml.parse(filename, OPTIONS, Error_Handler) + DOC :: #load("../../../../tests/core/assets/XML/unicode.xml") + + parse_duration: time.Duration + { + time.SCOPED_TICK_DURATION(&parse_duration) + doc, err = xml.parse(DOC, xml.Options{flags={.Ignore_Unsupported}}) + } defer xml.destroy(doc) + ms := time.duration_milliseconds(parse_duration) + speed := (f64(1000.0) / ms) * f64(len(DOC)) / 1_024.0 / 1_024.0 + fmt.printf("Parse time: %v bytes in %.2f ms (%.2f MiB/s).\n", len(DOC), ms, speed) + if err != .None { printf("Load/Parse error: %v\n", err) if err == .File_Error { - printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename) + println("\"unicode.xml\" not found. Did you run \"tests\\download_assets.py\"?") } os.exit(1) } - printf("\"%v\" loaded and parsed.\n", filename) + println("\"unicode.xml\" loaded and parsed.") charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist") if !charlist_ok { @@ -40,17 +45,19 @@ example :: proc() { printf("Found `` with %v children.\n", len(charlist.children)) - for char in charlist.children { - if char.ident != "character" { - eprintf("Expected ``, got `<%v>`\n", char.ident) - os.exit(1) - } + crc32 := doc_hash(doc) + printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32) +} - if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok { - eprintln("`` attribute not found.") - os.exit(1) - } - } +doc_hash :: proc(doc: ^xml.Document, print := false) -> (crc32: u32) { + buf: strings.Builder + defer strings.destroy_builder(&buf) + w := strings.to_writer(&buf) + + xml.print(w, doc) + tree := strings.to_string(buf) + if print { fmt.println(tree) } + return hash.crc32(transmute([]u8)tree) } main :: proc() { diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin index 6f49b8e08..b169bd57a 100644 --- a/core/encoding/xml/xml_reader.odin +++ b/core/encoding/xml/xml_reader.odin @@ -71,6 +71,12 @@ Option_Flag :: enum { This option decodes them when encountered. */ Decode_SGML_Entities, + + /* + If a tag body has a comment, it will be stripped unless this option is given. + */ + Keep_Tag_Body_Comments, + } Option_Flags :: bit_set[Option_Flag; u8] @@ -413,15 +419,29 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err /* This should be a tag's body text. */ - body_text := scan_string(t, t.offset) or_return + body_text := scan_string(t, t.offset) or_return + needs_processing := .Unbox_CDATA in opts.flags + needs_processing |= .Decode_SGML_Entities in opts.flags - decode_opts := entity.XML_Decode_Options{ .Comment_Strip } + if !needs_processing { + element.value = strings.intern_get(&doc.intern, body_text) + continue + } + + decode_opts := entity.XML_Decode_Options{} + if .Keep_Tag_Body_Comments not_in opts.flags { + decode_opts += { .Comment_Strip } + } if .Decode_SGML_Entities not_in opts.flags { decode_opts += { .No_Entity_Decode } } + if .Unbox_CDATA in opts.flags { - decode_opts += { .Unbox_CDATA, .Decode_CDATA } + decode_opts += { .Unbox_CDATA } + if .Decode_SGML_Entities in opts.flags { + decode_opts += { .Decode_CDATA } + } } decoded, decode_err := entity.decode_xml(body_text, decode_opts) diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin index f9f7a2992..7669afe97 100644 --- a/tests/core/encoding/xml/test_core_xml.odin +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -8,9 +8,7 @@ import "core:io" import "core:fmt" import "core:hash" -Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) { - // Custom (silent) error handler. -} +Silent :: proc(pos: xml.Pos, format: string, args: ..any) {} OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, }, expected_doctype = "", @@ -75,7 +73,7 @@ TESTS :: []TEST{ }, expected_doctype = "恥ずべきフクロウ", }, - crc32 = 0x6d38ac58, + crc32 = 0xad31d8e8, }, { @@ -131,7 +129,7 @@ TESTS :: []TEST{ }, expected_doctype = "html", }, - crc32 = 0xdb4a1e79, + crc32 = 0x573c1033, }, { @@ -306,7 +304,7 @@ run_tests :: proc(t: ^testing.T) { expect(t, err == test.err, err_msg) failed |= crc32 != test.crc32 - err_msg = tprintf("Expected CRC 0x%08x, got 0x%08x", test.crc32, crc32) + err_msg = tprintf("Expected CRC 0x%08x, got 0x%08x, with options %v", test.crc32, crc32, test.options) expect(t, crc32 == test.crc32, err_msg) if failed {