[xml] Add option.

This commit is contained in:
Jeroen van Rijn
2021-12-05 21:06:33 +01:00
parent d7200f6144
commit 7ec88d2430
4 changed files with 59 additions and 34 deletions

View File

@@ -64,8 +64,8 @@ main :: proc() {
mem.tracking_allocator_init(&track, context.allocator)
context.allocator = mem.tracking_allocator(&track)
_main()
//_entities()
// _main()
_entities()
if len(track.allocation_map) > 0 {
println()

View File

@@ -2,35 +2,40 @@ package xml_example
import "core:encoding/xml"
import "core:os"
import "core:path"
import "core:mem"
import "core:fmt"
/*
Silent error handler for the parser.
*/
Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {}
OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, }, expected_doctype = "unicode", }
import "core:time"
import "core:strings"
import "core:hash"
example :: proc() {
using fmt
filename := path.join(ODIN_ROOT, "tests", "core", "assets", "XML", "unicode.xml")
defer delete(filename)
doc: ^xml.Document
err: xml.Error
doc, err := xml.parse(filename, OPTIONS, Error_Handler)
DOC :: #load("../../../../tests/core/assets/XML/unicode.xml")
parse_duration: time.Duration
{
time.SCOPED_TICK_DURATION(&parse_duration)
doc, err = xml.parse(DOC, xml.Options{flags={.Ignore_Unsupported}})
}
defer xml.destroy(doc)
ms := time.duration_milliseconds(parse_duration)
speed := (f64(1000.0) / ms) * f64(len(DOC)) / 1_024.0 / 1_024.0
fmt.printf("Parse time: %v bytes in %.2f ms (%.2f MiB/s).\n", len(DOC), ms, speed)
if err != .None {
printf("Load/Parse error: %v\n", err)
if err == .File_Error {
printf("\"%v\" not found. Did you run \"tests\\download_assets.py\"?", filename)
println("\"unicode.xml\" not found. Did you run \"tests\\download_assets.py\"?")
}
os.exit(1)
}
printf("\"%v\" loaded and parsed.\n", filename)
println("\"unicode.xml\" loaded and parsed.")
charlist, charlist_ok := xml.find_child_by_ident(doc.root, "charlist")
if !charlist_ok {
@@ -40,17 +45,19 @@ example :: proc() {
printf("Found `<charlist>` with %v children.\n", len(charlist.children))
for char in charlist.children {
if char.ident != "character" {
eprintf("Expected `<character>`, got `<%v>`\n", char.ident)
os.exit(1)
}
crc32 := doc_hash(doc)
printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32)
}
if _, ok := xml.find_attribute_val_by_key(char, "dec"); !ok {
eprintln("`<character dec=\"...\">` attribute not found.")
os.exit(1)
}
}
doc_hash :: proc(doc: ^xml.Document, print := false) -> (crc32: u32) {
buf: strings.Builder
defer strings.destroy_builder(&buf)
w := strings.to_writer(&buf)
xml.print(w, doc)
tree := strings.to_string(buf)
if print { fmt.println(tree) }
return hash.crc32(transmute([]u8)tree)
}
main :: proc() {

View File

@@ -71,6 +71,12 @@ Option_Flag :: enum {
This option decodes them when encountered.
*/
Decode_SGML_Entities,
/*
If a tag body has a comment, it will be stripped unless this option is given.
*/
Keep_Tag_Body_Comments,
}
Option_Flags :: bit_set[Option_Flag; u8]
@@ -413,15 +419,29 @@ parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", err
/*
This should be a tag's body text.
*/
body_text := scan_string(t, t.offset) or_return
body_text := scan_string(t, t.offset) or_return
needs_processing := .Unbox_CDATA in opts.flags
needs_processing |= .Decode_SGML_Entities in opts.flags
decode_opts := entity.XML_Decode_Options{ .Comment_Strip }
if !needs_processing {
element.value = strings.intern_get(&doc.intern, body_text)
continue
}
decode_opts := entity.XML_Decode_Options{}
if .Keep_Tag_Body_Comments not_in opts.flags {
decode_opts += { .Comment_Strip }
}
if .Decode_SGML_Entities not_in opts.flags {
decode_opts += { .No_Entity_Decode }
}
if .Unbox_CDATA in opts.flags {
decode_opts += { .Unbox_CDATA, .Decode_CDATA }
decode_opts += { .Unbox_CDATA }
if .Decode_SGML_Entities in opts.flags {
decode_opts += { .Decode_CDATA }
}
}
decoded, decode_err := entity.decode_xml(body_text, decode_opts)

View File

@@ -8,9 +8,7 @@ import "core:io"
import "core:fmt"
import "core:hash"
Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) {
// Custom (silent) error handler.
}
Silent :: proc(pos: xml.Pos, format: string, args: ..any) {}
OPTIONS :: xml.Options{ flags = { .Ignore_Unsupported, .Intern_Comments, },
expected_doctype = "",
@@ -75,7 +73,7 @@ TESTS :: []TEST{
},
expected_doctype = "恥ずべきフクロウ",
},
crc32 = 0x6d38ac58,
crc32 = 0xad31d8e8,
},
{
@@ -131,7 +129,7 @@ TESTS :: []TEST{
},
expected_doctype = "html",
},
crc32 = 0xdb4a1e79,
crc32 = 0x573c1033,
},
{
@@ -306,7 +304,7 @@ run_tests :: proc(t: ^testing.T) {
expect(t, err == test.err, err_msg)
failed |= crc32 != test.crc32
err_msg = tprintf("Expected CRC 0x%08x, got 0x%08x", test.crc32, crc32)
err_msg = tprintf("Expected CRC 0x%08x, got 0x%08x, with options %v", test.crc32, crc32, test.options)
expect(t, crc32 == test.crc32, err_msg)
if failed {