diff --git a/core/encoding/xml/debug_print.odin b/core/encoding/xml/debug_print.odin new file mode 100644 index 000000000..0b7ffa822 --- /dev/null +++ b/core/encoding/xml/debug_print.odin @@ -0,0 +1,73 @@ +package xml +/* + An XML 1.0 / 1.1 parser + + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +import "core:fmt" + +/* + Just for debug purposes. +*/ +print :: proc(doc: ^Document) { + assert(doc != nil) + + using fmt + println("[XML Prolog]") + + for attr in doc.prolog { + printf("\t%v: %v\n", attr.key, attr.val) + } + + printf("[Encoding] %v\n", doc.encoding) + printf("[DOCTYPE] %v\n", doc.doctype.ident) + + if len(doc.doctype.rest) > 0 { + printf("\t%v\n", doc.doctype.rest) + } + + if doc.root != nil { + println(" --- ") + print_element(0, doc.root) + println(" --- ") + } +} + +print_element :: proc(indent: int, element: ^Element) { + if element == nil { return } + using fmt + + tab :: proc(indent: int) { + tabs := "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" + + i := max(0, min(indent, len(tabs))) + printf("%v", tabs[:i]) + } + + tab(indent) + + if element.kind == .Element { + printf("<%v>\n", element.ident) + if len(element.value) > 0 { + tab(indent + 1) + printf("[Value] %v\n", element.value) + } + + for attr in element.attribs { + tab(indent + 1) + printf("[Attr] %v: %v\n", attr.key, attr.val) + } + + for child in element.children { + print_element(indent + 1, child) + } + } else if element.kind == .Comment { + printf("[COMMENT] %v\n", element.value) + } +} \ No newline at end of file diff --git a/core/encoding/xml/example/xml_example.odin b/core/encoding/xml/example/xml_example.odin new file mode 100644 index 000000000..24a277de6 --- /dev/null +++ b/core/encoding/xml/example/xml_example.odin @@ -0,0 +1,55 @@ +package xml_example + +import "core:encoding/xml" +import "core:mem" +import "core:fmt" + +Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) { + +} + +FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff" +DOC :: #load(FILENAME) + +OPTIONS :: xml.Options{ + flags = { + .Ignore_Unsupported, .Intern_Comments, + }, + expected_doctype = "", +} + +_main :: proc() { + using fmt + + println("--- DOCUMENT TO PARSE ---") + println(string(DOC)) + println("--- /DOCUMENT TO PARSE ---\n") + + doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler) + defer xml.destroy(doc) + + xml.print(doc) + + if err != .None { + printf("Parse error: %v\n", err) + } else { + println("DONE!") + } +} + +main :: proc() { + using fmt + + track: mem.Tracking_Allocator + mem.tracking_allocator_init(&track, context.allocator) + context.allocator = mem.tracking_allocator(&track) + + _main() + + if len(track.allocation_map) > 0 { + println() + for _, v in track.allocation_map { + printf("%v Leaked %v bytes.\n", v.location, v.size) + } + } +} \ No newline at end of file diff --git a/core/encoding/xml/tokenizer.odin b/core/encoding/xml/tokenizer.odin new file mode 100644 index 000000000..a63dca5bd --- /dev/null +++ b/core/encoding/xml/tokenizer.odin @@ -0,0 +1,339 @@ +package xml + +import "core:fmt" +import "core:unicode" +import "core:unicode/utf8" + +Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any) + +Token :: struct { + kind: Token_Kind, + text: string, + pos: Pos, +} + +Pos :: struct { + file: string, + offset: int, // starting at 0 + line: int, // starting at 1 + column: int, // starting at 1 +} + +Token_Kind :: enum { + Invalid, + + Ident, + Literal, + Rune, + String, + + Double_Quote, // " + Single_Quote, // ' + Colon, // : + + Eq, // = + Lt, // < + Gt, // > + Exclaim, // ! + Question, // ? + Hash, // # + Slash, // / + Dash, // - + + Open_Bracket, // [ + Close_Bracket, // ] + + EOF, +} + +CDATA_START :: "" + +Tokenizer :: struct { + // Immutable data + path: string, + src: string, + err: Error_Handler, + + // Tokenizing state + ch: rune, + offset: int, + read_offset: int, + line_offset: int, + line_count: int, + + // Mutable data + error_count: int, +} + +init :: proc(t: ^Tokenizer, src: string, path: string, err: Error_Handler = default_error_handler) { + t.src = src + t.err = err + t.ch = ' ' + t.offset = 0 + t.read_offset = 0 + t.line_offset = 0 + t.line_count = len(src) > 0 ? 1 : 0 + t.error_count = 0 + t.path = path + + advance_rune(t) + if t.ch == utf8.RUNE_BOM { + advance_rune(t) + } +} + +@(private) +offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> Pos { + line := t.line_count + column := offset - t.line_offset + 1 + + return Pos { + file = t.path, + offset = offset, + line = line, + column = column, + } +} + +default_error_handler :: proc(pos: Pos, msg: string, args: ..any) { + fmt.eprintf("%s(%d:%d) ", pos.file, pos.line, pos.column) + fmt.eprintf(msg, ..args) + fmt.eprintf("\n") +} + +error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) { + pos := offset_to_pos(t, offset) + if t.err != nil { + t.err(pos, msg, ..args) + } + t.error_count += 1 +} + +advance_rune :: proc(using t: ^Tokenizer) { + if read_offset < len(src) { + offset = read_offset + if ch == '\n' { + line_offset = offset + line_count += 1 + } + r, w := rune(src[read_offset]), 1 + switch { + case r == 0: + error(t, t.offset, "illegal character NUL") + case r >= utf8.RUNE_SELF: + r, w = utf8.decode_rune_in_string(src[read_offset:]) + if r == utf8.RUNE_ERROR && w == 1 { + error(t, t.offset, "illegal UTF-8 encoding") + } else if r == utf8.RUNE_BOM && offset > 0 { + error(t, t.offset, "illegal byte order mark") + } + } + read_offset += w + ch = r + } else { + offset = len(src) + if ch == '\n' { + line_offset = offset + line_count += 1 + } + ch = -1 + } +} + +peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte { + if t.read_offset+offset < len(t.src) { + return t.src[t.read_offset+offset] + } + return 0 +} + +skip_whitespace :: proc(t: ^Tokenizer) { + for { + switch t.ch { + case ' ', '\t', '\r', '\n': + advance_rune(t) + case: + return + } + } +} + +is_letter :: proc(r: rune) -> bool { + if r < utf8.RUNE_SELF { + switch r { + case '_': + return true + case 'A'..='Z', 'a'..='z': + return true + } + } + return unicode.is_letter(r) +} + +is_valid_identifier_rune :: proc(r: rune) -> bool { + if r < utf8.RUNE_SELF { + switch r { + case '_', '-', ':': return true + case 'A'..='Z', 'a'..='z': return true + case '0'..'9': return true + } + } + + if unicode.is_digit(r) || unicode.is_letter(r) { + return true + } + return false +} + +scan_identifier :: proc(t: ^Tokenizer) -> string { + offset := t.offset + namespaced := false + + for is_valid_identifier_rune(t.ch) { + advance_rune(t) + if t.ch == ':' { + /* + A namespaced attr can have at most two parts, `namespace:ident`. + */ + if namespaced { + break + } + namespaced = true + } + } + return string(t.src[offset : t.offset]) +} + +scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false) -> (value: string, err: Error) { + err = .None + in_cdata := false + + loop: for { + ch := t.ch + + switch ch { + case -1: + error(t, t.offset, "[scan_string] Premature end of file.\n") + return "", .Premature_EOF + + case '<': + /* + Might be the start of a CDATA tag. + */ + if t.read_offset + len(CDATA_START) < len(t.src) { + if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START { + in_cdata = true + } + } + + case ']': + /* + Might be the end of a CDATA tag. + */ + if t.read_offset + len(CDATA_END) < len(t.src) { + if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END { + in_cdata = false + } + } + + case '\n': + if !in_cdata { + error(t, offset, string(t.src[offset : t.offset])) + error(t, offset, "[scan_string] Not terminated\n") + err = .Invalid_Tag_Value + break loop + } + } + + if ch == close && !in_cdata { + /* + If it's not a CDATA tag, it's the end of this body. + */ + break loop + } + + advance_rune(t) + } + + lit := string(t.src[offset : t.offset]) + if consume_close { + advance_rune(t) + } + + /* + TODO: Handle decoding escape characters and unboxing CDATA. + */ + + return lit, err +} + +peek :: proc(t: ^Tokenizer) -> (token: Token) { + old := t^ + token = scan(t) + t^ = old + return token +} + +scan :: proc(t: ^Tokenizer) -> Token { + skip_whitespace(t) + + offset := t.offset + + kind: Token_Kind + err: Error + lit: string + pos := offset_to_pos(t, offset) + + switch ch := t.ch; true { + case is_letter(ch): + lit = scan_identifier(t) + kind = .Ident + + case: + advance_rune(t) + switch ch { + case -1: + kind = .EOF + + case '<': kind = .Lt + case '>': kind = .Gt + case '!': kind = .Exclaim + case '?': kind = .Question + case '=': kind = .Eq + case '#': kind = .Hash + case '/': kind = .Slash + case '-': kind = .Dash + case ':': kind = .Colon + + case '"', '\'': + lit, err = scan_string(t, t.offset, ch, true) + if err == .None { + kind = .String + } else { + kind = .Invalid + } + + case '\n': + lit = "\n" + + case '\\': + token := scan(t) + if token.pos.line == pos.line { + error(t, token.pos.offset, "expected a newline after \\") + } + return token + + case: + if ch != utf8.RUNE_BOM { + // error(t, t.offset, "illegal character '%r': %d", ch, ch) + } + kind = .Invalid + } + } + + if lit == "" { + lit = string(t.src[offset : t.offset]) + } + return Token{kind, lit, pos} +} \ No newline at end of file diff --git a/core/encoding/xml/xml_reader.odin b/core/encoding/xml/xml_reader.odin new file mode 100644 index 000000000..526be5856 --- /dev/null +++ b/core/encoding/xml/xml_reader.odin @@ -0,0 +1,651 @@ +package xml +/* + An XML 1.0 / 1.1 parser + + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + A from-scratch XML implementation, loosely modelled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). + + Features: + - Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage. + - Simple to understand and use. Small. + + Caveats: + - We do NOT support HTML in this package, as that may or may not be valid XML. + If it works, great. If it doesn't, that's not considered a bug. + + - We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences. + - <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options. + + TODO: + - Optional CDATA unboxing. + - Optional `>`, ` `, ` ` and other escape substitution in tag bodies. + + MAYBE: + - XML writer? + - Serialize/deserialize Odin types? + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ + +import "core:strings" +import "core:mem" +import "core:os" + +DEFAULT_Options :: Options{ + flags = { + .Ignore_Unsupported, + }, + expected_doctype = "", +} + +Option_Flag :: enum { + /* + Document MUST start with ` (doc: ^Document, err: Error) { + context.allocator = allocator + + opts := validate_options(options) or_return + + t := &Tokenizer{} + init(t, string(data), path, error_handler) + + doc = new(Document) + doc.allocator = allocator + doc.tokenizer = t + + strings.intern_init(&doc.intern, allocator, allocator) + + err = .Unexpected_Token + element, parent: ^Element + + /* + If a DOCTYPE is present, the root tag has to match. + If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match. + */ + expected_doctype := options.expected_doctype + + loop: for { + tok := scan(t) + #partial switch tok.kind { + + case .Lt: + open := scan(t) + #partial switch open.kind { + + case .Question: + /* + 0 { + /* + We've already seen a prolog. + */ + return doc, .Too_Many_Prologs + } else { + error(t, t.offset, "Expected \" 0 { + return doc, .Too_Many_DocTypes + } + if doc.root != nil { + return doc, .DocType_Must_Proceed_Elements + } + parse_doctype(doc) or_return + + if len(expected_doctype) > 0 && expected_doctype != doc.doctype.ident { + error(t, t.offset, "Invalid DOCTYPE. Expected: %v, got: %v\n", expected_doctype, doc.doctype.ident) + return doc, .Invalid_DocType + } + expected_doctype = doc.doctype.ident + + case: + if .Error_on_Unsupported in opts.flags { + error(t, t.offset, "Unhandled: . + The grammar does not allow a comment to end in ---> + */ + if doc.root == nil { + return doc, .Comment_Before_Root_Element + } + + expect(t, .Dash) + offset := t.offset + + for { + advance_rune(t) + ch := t.ch + + /* + A comment ends when we see -->, preceded by a character that's not a dash. + "For compatibility, the string "--" (double-hyphen) must not occur within comments." + + See: https://www.w3.org/TR/2006/REC-xml11-20060816/#dt-comment + + Thanks to the length (4) of the comment start, we also have enough lookback, + and the peek at the next byte asserts that there's at least one more character + that's a `>`. + */ + if ch < 0 { + error(t, offset, "[parse] Comment was not terminated\n") + return doc, .Unclosed_Comment + } + + if string(t.src[t.offset - 1:][:2]) == "--" { + if peek_byte(t) == '>' { + break + } else { + error(t, t.offset - 1, "Invalid -- sequence in comment.\n") + return doc, .Invalid_Sequence_In_Comment + } + } + } + + if .Intern_Comments in opts.flags { + el := new(Element) + + el.parent = element + el.kind = .Comment + el.value = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1])) + append(&element.children, el) + } + + expect(t, .Dash) + expect(t, .Gt) + + case: + error(t, t.offset, "Invalid Token after 0 && expected_doctype != open.text { + error(t, t.offset, "Root Tag doesn't match DOCTYPE. Expected: %v, got: %v\n", expected_doctype, open.text) + return doc, .Invalid_DocType + } + } + + /* + One of these should follow: + - `>`, which means we've just opened this tag and expect a later element to close it. + - `/>`, which means this is an 'empty' or self-closing tag. + */ + end_token := scan(t) + + #partial switch end_token.kind { + case .Gt: + /* + We're now the new parent. + */ + parent = element + + case .Slash: + /* + Empty tag? + */ + expect(t, .Gt) or_return + + case: + error(t, t.offset, "Expected close tag, got: %#v\n", end_token) + return + } + + case .Slash: + /* + Close tag. + */ + ident := expect(t, .Ident) or_return + _ = expect(t, .Gt) or_return + + if element.ident != ident.text { + error(t, t.offset, "Mismatched Closing Tag: %v\n", ident.text) + return doc, .Mismatched_Closing_Tag + } + parent = element.parent + element = parent + + case: + error(t, t.offset, "Invalid Token after <: %#v\n", open) + return + } + + case .EOF: + break loop + + case: + /* + This should be a tag's body text. + */ + element.value = scan_string(t, tok.pos.offset) or_return + } + } + + if .Must_Have_Prolog in opts.flags && len(doc.prolog) == 0 { + return doc, .No_Prolog + } + + if .Must_Have_DocType in opts.flags && len(doc.doctype.ident) == 0 { + return doc, .No_DocType + } + + return doc, .None +} + +parse_from_file :: proc(filename: string, options := DEFAULT_Options, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { + context.allocator = allocator + + data, data_ok := os.read_entire_file(filename) + defer delete(data) + + if !data_ok { return {}, .File_Error } + + return parse_from_slice(data, options, filename, error_handler, allocator) +} + +parse :: proc { parse_from_file, parse_from_slice } + +free_element :: proc(element: ^Element) { + if element == nil { return } + + for child in element.children { + /* + NOTE: Recursive. + + Could be rewritten so it adds them to a list of pointers to free. + */ + free_element(child) + } + delete(element.attribs) + delete(element.children) + free(element) +} + +destroy :: proc(doc: ^Document) { + if doc == nil { return } + + free_element(doc.root) + strings.intern_destroy(&doc.intern) + + delete(doc.prolog) + free(doc) +} + +/* + Helpers. +*/ + +validate_options :: proc(options: Options) -> (validated: Options, err: Error) { + validated = options + + if .Error_on_Unsupported in validated.flags && .Ignore_Unsupported in validated.flags { + return options, .Conflicting_Options + } + + if .Unbox_CDATA in validated.flags { + return options, .Unhandled_CDATA_Unboxing + } + + if .Decode_SGML_Entities in validated.flags { + return options, .Unhandled_SGML_Entity_Decoding + } + + return validated, .None +} + +expect :: proc(t: ^Tokenizer, kind: Token_Kind) -> (tok: Token, err: Error) { + tok = scan(t) + if tok.kind == kind { return tok, .None } + + error(t, t.offset, "Expected \"%v\", got \"%v\".", kind, tok.kind) + return tok, .Unexpected_Token +} + +parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) { + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + key := expect(t, .Ident) or_return + offset = t.offset - len(key.text) + + _ = expect(t, .Eq) or_return + value := expect(t, .String) or_return + + attr.key = strings.intern_get(&doc.intern, key.text) + attr.val = strings.intern_get(&doc.intern, value.text) + + err = .None + return +} + +check_duplicate_attributes :: proc(t: ^Tokenizer, attribs: Attributes, attr: Attr, offset: int) -> (err: Error) { + for a in attribs { + if attr.key == a.key { + error(t, offset, "Duplicate attribute: %v\n", attr.key) + return .Duplicate_Attribute + } + } + return .None +} + +parse_attributes :: proc(doc: ^Document, attribs: ^Attributes) -> (err: Error) { + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + for peek(t).kind == .Ident { + attr, offset := parse_attribute(doc) or_return + check_duplicate_attributes(t, attribs^, attr, offset) or_return + append(attribs, attr) + } + skip_whitespace(t) + return .None +} + +parse_prolog :: proc(doc: ^Document) -> (err: Error) { + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + offset := t.offset + parse_attributes(doc, &doc.prolog) or_return + + for attr in doc.prolog { + switch attr.key { + case "version": + switch attr.val { + case "1.0", "1.1": + case: + error(t, offset, "[parse_prolog] Warning: Unhandled XML version: %v\n", attr.val) + } + + case "encoding": + switch strings.to_lower(attr.val, context.temp_allocator) { + case "utf-8", "utf8": + doc.encoding = .UTF_8 + + case "latin-1", "latin1", "iso-8859-1": + doc.encoding = .LATIN_1 + + case: + /* + Unrecognized encoding, assume UTF-8. + */ + error(t, offset, "[parse_prolog] Warning: Unrecognized encoding: %v\n", attr.val) + } + + case: + // Ignored. + } + } + + _ = expect(t, .Question) or_return + _ = expect(t, .Gt) or_return + + return .None +} + +skip_element :: proc(t: ^Tokenizer) -> (err: Error) { + close := 1 + + loop: for { + tok := scan(t) + #partial switch tok.kind { + case .EOF: + error(t, t.offset, "[skip_element] Premature EOF\n") + return .Premature_EOF + + case .Lt: + close += 1 + + case .Gt: + close -= 1 + if close == 0 { + break loop + } + + case: + + } + } + return .None +} + +parse_doctype :: proc(doc: ^Document) -> (err: Error) { + /* + + + + ]> + */ + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + tok := expect(t, .Ident) or_return + doc.doctype.ident = strings.intern_get(&doc.intern, tok.text) + + skip_whitespace(t) + offset := t.offset + skip_element(t) or_return + + /* + -1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it. + */ + doc.doctype.rest = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1])) + return .None +} \ No newline at end of file diff --git a/tests/core/Makefile b/tests/core/Makefile index 0f0ffe4d6..e17dede90 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -1,22 +1,29 @@ ODIN=../../odin PYTHON=$(shell which python3) -all: download_test_assets image_test compress_test strings_test hash_test crypto_test +all: download_test_assets image_test compress_test strings_test hash_test crypto_test encoding_test download_test_assets: $(PYTHON) download_assets.py image_test: - $(ODIN) run image/test_core_image.odin + $(ODIN) run image/test_core_image.odin -out=test_image -o:speed -no-bounds-check compress_test: - $(ODIN) run compress/test_core_compress.odin + $(ODIN) run compress/test_core_compress.odin -out=test_compress -o:speed -no-bounds-check strings_test: - $(ODIN) run strings/test_core_strings.odin + $(ODIN) run strings/test_core_strings.odin -out=test_strings -o:speed -no-bounds-check + +odin_test: + $(ODIN) run odin -out=test_odin -o:speed -no-bounds-check hash_test: $(ODIN) run hash -out=test_hash -o:speed -no-bounds-check crypto_test: - $(ODIN) run crypto -out=crypto_hash -o:speed -no-bounds-check \ No newline at end of file + $(ODIN) run crypto -out=crypto_hash -o:speed -no-bounds-check + +encoding_test: + $(ODIN) run encoding/json -out=test_encoding_json -o:speed -no-bounds-check + $(ODIN) run encoding/xml -out=test_encoding_xml -o:speed -no-bounds-check diff --git a/tests/core/assets/xml/nl_NL-qt-ts.ts b/tests/core/assets/xml/nl_NL-qt-ts.ts new file mode 100644 index 000000000..6ec3f2f47 --- /dev/null +++ b/tests/core/assets/xml/nl_NL-qt-ts.ts @@ -0,0 +1,35 @@ + + + + + Page + + Text for translation + commenting + Tekst om te vertalen + + + Also text to translate + some text + Ook tekst om te vertalen + + + + installscript + + 99 bottles of beer on the wall + some new comments here + 99 flessen bier op de muur + + + + apple_count + + %d apple(s) + + %d appel + %d appels + + + + diff --git a/tests/core/assets/xml/nl_NL-xliff-1.0.xliff b/tests/core/assets/xml/nl_NL-xliff-1.0.xliff new file mode 100644 index 000000000..7a1abcd66 --- /dev/null +++ b/tests/core/assets/xml/nl_NL-xliff-1.0.xliff @@ -0,0 +1,38 @@ + + + + + + text + tekst + Context + + + text 1 + tekst 1 + Context 1 + + + text 2 + + Context of the segment 2 + + + text 3 + translation 3 + Context 3 + + + Plurals + + %d month + %d maand + + + %d months + %d maanden + + + + + diff --git a/tests/core/assets/xml/nl_NL-xliff-2.0.xliff b/tests/core/assets/xml/nl_NL-xliff-2.0.xliff new file mode 100644 index 000000000..611ac80c4 --- /dev/null +++ b/tests/core/assets/xml/nl_NL-xliff-2.0.xliff @@ -0,0 +1,52 @@ + + + + + Note for file + + + + Note for unit + + + text + + + + + + Note for unit 2 + + + text 2 + translation 2 + + + + + Note for unit 3 + + + text 3 + approved translation 3 + + + + + + Plurals + + + %d month + %d maand + + + + + %d months + %d maanden + + + + + \ No newline at end of file diff --git a/tests/core/assets/xml/utf8.xml b/tests/core/assets/xml/utf8.xml new file mode 100644 index 000000000..c9ed3bf69 --- /dev/null +++ b/tests/core/assets/xml/utf8.xml @@ -0,0 +1,8 @@ + + +<恥ずべきフクロウ 올빼미_id="Foozle Hello, world!"]]>Barzle"> +<부끄러운:barzle> + ရှက်စရာ ဇီးကွက် + Owl of Shame + More CDATA Hello, world! Nonsense. + \ No newline at end of file diff --git a/tests/core/build.bat b/tests/core/build.bat index 176b7f175..7a214acc9 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -5,34 +5,35 @@ python3 download_assets.py echo --- echo Running core:image tests echo --- -%PATH_TO_ODIN% run image %COMMON% +%PATH_TO_ODIN% run image %COMMON% -out:test_image.exe echo --- echo Running core:compress tests echo --- -%PATH_TO_ODIN% run compress %COMMON% +%PATH_TO_ODIN% run compress %COMMON% -out:test_compress.exe echo --- echo Running core:strings tests echo --- -%PATH_TO_ODIN% run strings %COMMON% +%PATH_TO_ODIN% run strings %COMMON% -out:test_strings.exe echo --- echo Running core:hash tests echo --- -%PATH_TO_ODIN% run hash %COMMON% -o:size +%PATH_TO_ODIN% run hash %COMMON% -o:size -out:test_hash.exe echo --- echo Running core:odin tests echo --- -%PATH_TO_ODIN% run odin %COMMON% -o:size +%PATH_TO_ODIN% run odin %COMMON% -o:size -out:test_odin.exe echo --- echo Running core:crypto hash tests echo --- -%PATH_TO_ODIN% run crypto %COMMON% +%PATH_TO_ODIN% run crypto %COMMON% -o:speed -out:test_crypto.exe echo --- echo Running core:encoding tests echo --- -%PATH_TO_ODIN% run encoding %COMMON% \ No newline at end of file +%PATH_TO_ODIN% run encoding\json %COMMON% -out:test_json.exe +%PATH_TO_ODIN% run encoding\xml %COMMON% -out:test_xml.exe \ No newline at end of file diff --git a/tests/core/encoding/test_core_json.odin b/tests/core/encoding/json/test_core_json.odin similarity index 63% rename from tests/core/encoding/test_core_json.odin rename to tests/core/encoding/json/test_core_json.odin index f536eb4c6..4f415c008 100644 --- a/tests/core/encoding/test_core_json.odin +++ b/tests/core/encoding/json/test_core_json.odin @@ -8,32 +8,32 @@ TEST_count := 0 TEST_fail := 0 when ODIN_TEST { - expect :: testing.expect - log :: testing.log + expect :: testing.expect + log :: testing.log } else { - expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { - fmt.printf("[%v] ", loc) - TEST_count += 1 - if !condition { - TEST_fail += 1 - fmt.println(message) - return - } - fmt.println(" PASS") - } - log :: proc(t: ^testing.T, v: any, loc := #caller_location) { - fmt.printf("[%v] ", loc) - fmt.printf("log: %v\n", v) - } + expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { + fmt.printf("[%v] ", loc) + TEST_count += 1 + if !condition { + TEST_fail += 1 + fmt.println(message) + return + } + fmt.println(" PASS") + } + log :: proc(t: ^testing.T, v: any, loc := #caller_location) { + fmt.printf("[%v] ", loc) + fmt.printf("log: %v\n", v) + } } main :: proc() { - t := testing.T{} + t := testing.T{} parse_json(&t) marshal_json(&t) - fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) } @test diff --git a/tests/core/encoding/xml/test_core_xml.odin b/tests/core/encoding/xml/test_core_xml.odin new file mode 100644 index 000000000..7eefac212 --- /dev/null +++ b/tests/core/encoding/xml/test_core_xml.odin @@ -0,0 +1,264 @@ +package test_core_xml + +import "core:encoding/xml" +import "core:testing" +import "core:mem" +import "core:fmt" + +Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) { + // Custom (silent) error handler. +} + +OPTIONS :: xml.Options{ + flags = { + .Ignore_Unsupported, .Intern_Comments, + }, + expected_doctype = "", +} + +TEST_count := 0 +TEST_fail := 0 + +TEST :: struct { + filename: string, + options: xml.Options, + expected: struct { + error: xml.Error, + xml_version: string, + xml_encoding: string, + doctype: string, + }, +} + +TESTS :: []TEST{ + /* + First we test that certain files parse without error. + */ + { + filename = "assets/xml/utf8.xml", + options = OPTIONS, + expected = { + error = .None, + xml_version = "1.0", + xml_encoding = "utf-8", + doctype = "恥ずべきフクロウ", + }, + }, + { + filename = "assets/xml/nl_NL-qt-ts.ts", + options = OPTIONS, + expected = { + error = .None, + xml_version = "1.0", + xml_encoding = "utf-8", + doctype = "TS", + }, + }, + { + filename = "assets/xml/nl_NL-xliff-1.0.xliff", + options = OPTIONS, + expected = { + error = .None, + xml_version = "1.0", + xml_encoding = "UTF-8", + doctype = "", + }, + }, + { + filename = "assets/xml/nl_NL-xliff-2.0.xliff", + options = OPTIONS, + expected = { + error = .None, + xml_version = "1.0", + xml_encoding = "utf-8", + doctype = "", + }, + }, + + /* + Then we test that certain errors are returned as expected. + */ + { + filename = "assets/xml/utf8.xml", + options = { + flags = { + .Ignore_Unsupported, .Intern_Comments, + }, + expected_doctype = "Odin", + }, + expected = { + error = .Invalid_DocType, + xml_version = "1.0", + xml_encoding = "utf-8", + doctype = "恥ずべきフクロウ", + }, + }, +} + +when ODIN_TEST { + expect :: testing.expect + log :: testing.log +} else { + expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { + fmt.printf("[%v] ", loc) + TEST_count += 1 + if !condition { + TEST_fail += 1 + fmt.println(message) + return + } + fmt.println(" PASS") + } + log :: proc(t: ^testing.T, v: any, loc := #caller_location) { + fmt.printf("[%v] ", loc) + fmt.printf("log: %v\n", v) + } +} + +main :: proc() { + t := testing.T{} + + track: mem.Tracking_Allocator + mem.tracking_allocator_init(&track, context.allocator) + context.allocator = mem.tracking_allocator(&track) + + run_tests(&t) + + if len(track.allocation_map) > 0 { + for _, v in track.allocation_map { + err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size) + expect(&t, false, err_msg) + } + } + + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) +} + +@test +run_tests :: proc(t: ^testing.T) { + using fmt + + count := 0 + + for test in TESTS { + printf("Trying to parse %v\n\n", test.filename) + + doc, err := xml.parse(test.filename, test.options, Silent) + defer xml.destroy(doc) + + err_msg := tprintf("Expected return value %v, got %v", test.expected.error, err) + expect(t, err == test.expected.error, err_msg) + + if len(test.expected.xml_version) > 0 { + xml_version := "" + for attr in doc.prolog { + if attr.key == "version" { + xml_version = attr.val + } + } + + err_msg = tprintf("Expected XML version %v, got %v", test.expected.xml_version, xml_version) + expect(t, xml_version == test.expected.xml_version, err_msg) + } + + if len(test.expected.xml_encoding) > 0 { + xml_encoding := "" + for attr in doc.prolog { + if attr.key == "encoding" { + xml_encoding = attr.val + } + } + + err_msg = tprintf("Expected XML encoding %v, got %v", test.expected.xml_encoding, xml_encoding) + expect(t, xml_encoding == test.expected.xml_encoding, err_msg) + } + + err_msg = tprintf("Expected DOCTYPE %v, got %v", test.expected.doctype, doc.doctype.ident) + expect(t, doc.doctype.ident == test.expected.doctype, err_msg) + + /* + File-specific tests. + */ + switch count { + case 0: + expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.") + attr := doc.root.attribs[0] + + attr_key_expected := "올빼미_id" + attr_val_expected := "Foozle Hello, world!\"]]>Barzle" + + attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) + expect(t, attr.key == attr_key_expected, attr_err) + + attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) + expect(t, attr.val == attr_val_expected, attr_err) + + expect(t, len(doc.root.children) > 0, "Expected the root tag to have children.") + child := doc.root.children[0] + + first_child_ident := "부끄러운:barzle" + attr_err = tprintf("Expected first child tag's ident to be %v, got %v", first_child_ident, child.ident) + expect(t, child.ident == first_child_ident, attr_err) + + case 2: + expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.") + + { + attr := doc.root.attribs[0] + + attr_key_expected := "version" + attr_val_expected := "1.2" + + attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) + expect(t, attr.key == attr_key_expected, attr_err) + + attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) + expect(t, attr.val == attr_val_expected, attr_err) + } + + { + attr := doc.root.attribs[1] + + attr_key_expected := "xmlns" + attr_val_expected := "urn:oasis:names:tc:xliff:document:1.2" + + attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) + expect(t, attr.key == attr_key_expected, attr_err) + + attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) + expect(t, attr.val == attr_val_expected, attr_err) + } + + case 3: + expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.") + + { + attr := doc.root.attribs[0] + + attr_key_expected := "xmlns" + attr_val_expected := "urn:oasis:names:tc:xliff:document:2.0" + + attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) + expect(t, attr.key == attr_key_expected, attr_err) + + attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) + expect(t, attr.val == attr_val_expected, attr_err) + } + + { + attr := doc.root.attribs[1] + + attr_key_expected := "version" + attr_val_expected := "2.0" + + attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key) + expect(t, attr.key == attr_key_expected, attr_err) + + attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val) + expect(t, attr.val == attr_val_expected, attr_err) + } + } + + count += 1 + } +} \ No newline at end of file