mirror of
https://github.com/odin-lang/Odin.git
synced 2026-01-05 04:27:51 +00:00
[xml] Initial implementation of core:encoding/xml.
A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). Features: - Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage. - Simple to understand and use. Small. Caveats: - We do NOT support HTML in this package, as that may or may not be valid XML. If it works, great. If it doesn't, that's not considered a bug. - We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences. - <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options. TODO: - Optional CDATA unboxing. - Optional `>`, ` `, ` ` and other escape substitution in tag bodies. - Test suite MAYBE: - XML writer? - Serialize/deserialize Odin types?
This commit is contained in:
73
core/encoding/xml/debug_print.odin
Normal file
73
core/encoding/xml/debug_print.odin
Normal file
@@ -0,0 +1,73 @@
|
||||
package xml
|
||||
/*
|
||||
An XML 1.0 / 1.1 parser
|
||||
|
||||
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
||||
Made available under Odin's BSD-3 license.
|
||||
|
||||
A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
|
||||
|
||||
List of contributors:
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
*/
|
||||
import "core:fmt"
|
||||
|
||||
/*
|
||||
Just for debug purposes.
|
||||
*/
|
||||
print :: proc(doc: ^Document) {
|
||||
assert(doc != nil)
|
||||
|
||||
using fmt
|
||||
println("[XML Prolog]")
|
||||
|
||||
for attr in doc.prolog {
|
||||
printf("\t%v: %v\n", attr.key, attr.val)
|
||||
}
|
||||
|
||||
printf("[Encoding] %v\n", doc.encoding)
|
||||
printf("[DOCTYPE] %v\n", doc.doctype.ident)
|
||||
|
||||
if len(doc.doctype.rest) > 0 {
|
||||
printf("\t%v\n", doc.doctype.rest)
|
||||
}
|
||||
|
||||
if doc.root != nil {
|
||||
println(" --- ")
|
||||
print_element(0, doc.root)
|
||||
println(" --- ")
|
||||
}
|
||||
}
|
||||
|
||||
print_element :: proc(indent: int, element: ^Element) {
|
||||
if element == nil { return }
|
||||
using fmt
|
||||
|
||||
tab :: proc(indent: int) {
|
||||
tabs := "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
|
||||
|
||||
i := max(0, min(indent, len(tabs)))
|
||||
printf("%v", tabs[:i])
|
||||
}
|
||||
|
||||
tab(indent)
|
||||
|
||||
if element.kind == .Element {
|
||||
printf("<%v>\n", element.ident)
|
||||
if len(element.value) > 0 {
|
||||
tab(indent + 1)
|
||||
printf("[Value] %v\n", element.value)
|
||||
}
|
||||
|
||||
for attr in element.attribs {
|
||||
tab(indent + 1)
|
||||
printf("[Attr] %v: %v\n", attr.key, attr.val)
|
||||
}
|
||||
|
||||
for child in element.children {
|
||||
print_element(indent + 1, child)
|
||||
}
|
||||
} else if element.kind == .Comment {
|
||||
printf("[COMMENT] %v\n", element.value)
|
||||
}
|
||||
}
|
||||
55
core/encoding/xml/example/xml_example.odin
Normal file
55
core/encoding/xml/example/xml_example.odin
Normal file
@@ -0,0 +1,55 @@
|
||||
package xml_example
|
||||
|
||||
import "core:encoding/xml"
|
||||
import "core:mem"
|
||||
import "core:fmt"
|
||||
|
||||
Error_Handler :: proc(pos: xml.Pos, fmt: string, args: ..any) {
|
||||
|
||||
}
|
||||
|
||||
FILENAME :: "../../../../tests/core/assets/xml/nl_NL-xliff-1.0.xliff"
|
||||
DOC :: #load(FILENAME)
|
||||
|
||||
OPTIONS :: xml.Options{
|
||||
flags = {
|
||||
.Ignore_Unsupported, .Intern_Comments,
|
||||
},
|
||||
expected_doctype = "",
|
||||
}
|
||||
|
||||
_main :: proc() {
|
||||
using fmt
|
||||
|
||||
println("--- DOCUMENT TO PARSE ---")
|
||||
println(string(DOC))
|
||||
println("--- /DOCUMENT TO PARSE ---\n")
|
||||
|
||||
doc, err := xml.parse(DOC, OPTIONS, FILENAME, Error_Handler)
|
||||
defer xml.destroy(doc)
|
||||
|
||||
xml.print(doc)
|
||||
|
||||
if err != .None {
|
||||
printf("Parse error: %v\n", err)
|
||||
} else {
|
||||
println("DONE!")
|
||||
}
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
using fmt
|
||||
|
||||
track: mem.Tracking_Allocator
|
||||
mem.tracking_allocator_init(&track, context.allocator)
|
||||
context.allocator = mem.tracking_allocator(&track)
|
||||
|
||||
_main()
|
||||
|
||||
if len(track.allocation_map) > 0 {
|
||||
println()
|
||||
for _, v in track.allocation_map {
|
||||
printf("%v Leaked %v bytes.\n", v.location, v.size)
|
||||
}
|
||||
}
|
||||
}
|
||||
339
core/encoding/xml/tokenizer.odin
Normal file
339
core/encoding/xml/tokenizer.odin
Normal file
@@ -0,0 +1,339 @@
|
||||
package xml
|
||||
|
||||
import "core:fmt"
|
||||
import "core:unicode"
|
||||
import "core:unicode/utf8"
|
||||
|
||||
Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any)
|
||||
|
||||
Token :: struct {
|
||||
kind: Token_Kind,
|
||||
text: string,
|
||||
pos: Pos,
|
||||
}
|
||||
|
||||
Pos :: struct {
|
||||
file: string,
|
||||
offset: int, // starting at 0
|
||||
line: int, // starting at 1
|
||||
column: int, // starting at 1
|
||||
}
|
||||
|
||||
Token_Kind :: enum {
|
||||
Invalid,
|
||||
|
||||
Ident,
|
||||
Literal,
|
||||
Rune,
|
||||
String,
|
||||
|
||||
Double_Quote, // "
|
||||
Single_Quote, // '
|
||||
Colon, // :
|
||||
|
||||
Eq, // =
|
||||
Lt, // <
|
||||
Gt, // >
|
||||
Exclaim, // !
|
||||
Question, // ?
|
||||
Hash, // #
|
||||
Slash, // /
|
||||
Dash, // -
|
||||
|
||||
Open_Bracket, // [
|
||||
Close_Bracket, // ]
|
||||
|
||||
EOF,
|
||||
}
|
||||
|
||||
CDATA_START :: "<![CDATA["
|
||||
CDATA_END :: "]]>"
|
||||
|
||||
Tokenizer :: struct {
|
||||
// Immutable data
|
||||
path: string,
|
||||
src: string,
|
||||
err: Error_Handler,
|
||||
|
||||
// Tokenizing state
|
||||
ch: rune,
|
||||
offset: int,
|
||||
read_offset: int,
|
||||
line_offset: int,
|
||||
line_count: int,
|
||||
|
||||
// Mutable data
|
||||
error_count: int,
|
||||
}
|
||||
|
||||
init :: proc(t: ^Tokenizer, src: string, path: string, err: Error_Handler = default_error_handler) {
|
||||
t.src = src
|
||||
t.err = err
|
||||
t.ch = ' '
|
||||
t.offset = 0
|
||||
t.read_offset = 0
|
||||
t.line_offset = 0
|
||||
t.line_count = len(src) > 0 ? 1 : 0
|
||||
t.error_count = 0
|
||||
t.path = path
|
||||
|
||||
advance_rune(t)
|
||||
if t.ch == utf8.RUNE_BOM {
|
||||
advance_rune(t)
|
||||
}
|
||||
}
|
||||
|
||||
@(private)
|
||||
offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> Pos {
|
||||
line := t.line_count
|
||||
column := offset - t.line_offset + 1
|
||||
|
||||
return Pos {
|
||||
file = t.path,
|
||||
offset = offset,
|
||||
line = line,
|
||||
column = column,
|
||||
}
|
||||
}
|
||||
|
||||
default_error_handler :: proc(pos: Pos, msg: string, args: ..any) {
|
||||
fmt.eprintf("%s(%d:%d) ", pos.file, pos.line, pos.column)
|
||||
fmt.eprintf(msg, ..args)
|
||||
fmt.eprintf("\n")
|
||||
}
|
||||
|
||||
error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
|
||||
pos := offset_to_pos(t, offset)
|
||||
if t.err != nil {
|
||||
t.err(pos, msg, ..args)
|
||||
}
|
||||
t.error_count += 1
|
||||
}
|
||||
|
||||
advance_rune :: proc(using t: ^Tokenizer) {
|
||||
if read_offset < len(src) {
|
||||
offset = read_offset
|
||||
if ch == '\n' {
|
||||
line_offset = offset
|
||||
line_count += 1
|
||||
}
|
||||
r, w := rune(src[read_offset]), 1
|
||||
switch {
|
||||
case r == 0:
|
||||
error(t, t.offset, "illegal character NUL")
|
||||
case r >= utf8.RUNE_SELF:
|
||||
r, w = utf8.decode_rune_in_string(src[read_offset:])
|
||||
if r == utf8.RUNE_ERROR && w == 1 {
|
||||
error(t, t.offset, "illegal UTF-8 encoding")
|
||||
} else if r == utf8.RUNE_BOM && offset > 0 {
|
||||
error(t, t.offset, "illegal byte order mark")
|
||||
}
|
||||
}
|
||||
read_offset += w
|
||||
ch = r
|
||||
} else {
|
||||
offset = len(src)
|
||||
if ch == '\n' {
|
||||
line_offset = offset
|
||||
line_count += 1
|
||||
}
|
||||
ch = -1
|
||||
}
|
||||
}
|
||||
|
||||
peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte {
|
||||
if t.read_offset+offset < len(t.src) {
|
||||
return t.src[t.read_offset+offset]
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
skip_whitespace :: proc(t: ^Tokenizer) {
|
||||
for {
|
||||
switch t.ch {
|
||||
case ' ', '\t', '\r', '\n':
|
||||
advance_rune(t)
|
||||
case:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
is_letter :: proc(r: rune) -> bool {
|
||||
if r < utf8.RUNE_SELF {
|
||||
switch r {
|
||||
case '_':
|
||||
return true
|
||||
case 'A'..='Z', 'a'..='z':
|
||||
return true
|
||||
}
|
||||
}
|
||||
return unicode.is_letter(r)
|
||||
}
|
||||
|
||||
is_valid_identifier_rune :: proc(r: rune) -> bool {
|
||||
if r < utf8.RUNE_SELF {
|
||||
switch r {
|
||||
case '_', '-', ':': return true
|
||||
case 'A'..='Z', 'a'..='z': return true
|
||||
case '0'..'9': return true
|
||||
}
|
||||
}
|
||||
|
||||
if unicode.is_digit(r) || unicode.is_letter(r) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
scan_identifier :: proc(t: ^Tokenizer) -> string {
|
||||
offset := t.offset
|
||||
namespaced := false
|
||||
|
||||
for is_valid_identifier_rune(t.ch) {
|
||||
advance_rune(t)
|
||||
if t.ch == ':' {
|
||||
/*
|
||||
A namespaced attr can have at most two parts, `namespace:ident`.
|
||||
*/
|
||||
if namespaced {
|
||||
break
|
||||
}
|
||||
namespaced = true
|
||||
}
|
||||
}
|
||||
return string(t.src[offset : t.offset])
|
||||
}
|
||||
|
||||
scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false) -> (value: string, err: Error) {
|
||||
err = .None
|
||||
in_cdata := false
|
||||
|
||||
loop: for {
|
||||
ch := t.ch
|
||||
|
||||
switch ch {
|
||||
case -1:
|
||||
error(t, t.offset, "[scan_string] Premature end of file.\n")
|
||||
return "", .Premature_EOF
|
||||
|
||||
case '<':
|
||||
/*
|
||||
Might be the start of a CDATA tag.
|
||||
*/
|
||||
if t.read_offset + len(CDATA_START) < len(t.src) {
|
||||
if string(t.src[t.offset:][:len(CDATA_START)]) == CDATA_START {
|
||||
in_cdata = true
|
||||
}
|
||||
}
|
||||
|
||||
case ']':
|
||||
/*
|
||||
Might be the end of a CDATA tag.
|
||||
*/
|
||||
if t.read_offset + len(CDATA_END) < len(t.src) {
|
||||
if string(t.src[t.offset:][:len(CDATA_END)]) == CDATA_END {
|
||||
in_cdata = false
|
||||
}
|
||||
}
|
||||
|
||||
case '\n':
|
||||
if !in_cdata {
|
||||
error(t, offset, string(t.src[offset : t.offset]))
|
||||
error(t, offset, "[scan_string] Not terminated\n")
|
||||
err = .Invalid_Tag_Value
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
if ch == close && !in_cdata {
|
||||
/*
|
||||
If it's not a CDATA tag, it's the end of this body.
|
||||
*/
|
||||
break loop
|
||||
}
|
||||
|
||||
advance_rune(t)
|
||||
}
|
||||
|
||||
lit := string(t.src[offset : t.offset])
|
||||
if consume_close {
|
||||
advance_rune(t)
|
||||
}
|
||||
|
||||
/*
|
||||
TODO: Handle decoding escape characters and unboxing CDATA.
|
||||
*/
|
||||
|
||||
return lit, err
|
||||
}
|
||||
|
||||
peek :: proc(t: ^Tokenizer) -> (token: Token) {
|
||||
old := t^
|
||||
token = scan(t)
|
||||
t^ = old
|
||||
return token
|
||||
}
|
||||
|
||||
scan :: proc(t: ^Tokenizer) -> Token {
|
||||
skip_whitespace(t)
|
||||
|
||||
offset := t.offset
|
||||
|
||||
kind: Token_Kind
|
||||
err: Error
|
||||
lit: string
|
||||
pos := offset_to_pos(t, offset)
|
||||
|
||||
switch ch := t.ch; true {
|
||||
case is_letter(ch):
|
||||
lit = scan_identifier(t)
|
||||
kind = .Ident
|
||||
|
||||
case:
|
||||
advance_rune(t)
|
||||
switch ch {
|
||||
case -1:
|
||||
kind = .EOF
|
||||
|
||||
case '<': kind = .Lt
|
||||
case '>': kind = .Gt
|
||||
case '!': kind = .Exclaim
|
||||
case '?': kind = .Question
|
||||
case '=': kind = .Eq
|
||||
case '#': kind = .Hash
|
||||
case '/': kind = .Slash
|
||||
case '-': kind = .Dash
|
||||
case ':': kind = .Colon
|
||||
|
||||
case '"', '\'':
|
||||
lit, err = scan_string(t, t.offset, ch, true)
|
||||
if err == .None {
|
||||
kind = .String
|
||||
} else {
|
||||
kind = .Invalid
|
||||
}
|
||||
|
||||
case '\n':
|
||||
lit = "\n"
|
||||
|
||||
case '\\':
|
||||
token := scan(t)
|
||||
if token.pos.line == pos.line {
|
||||
error(t, token.pos.offset, "expected a newline after \\")
|
||||
}
|
||||
return token
|
||||
|
||||
case:
|
||||
if ch != utf8.RUNE_BOM {
|
||||
// error(t, t.offset, "illegal character '%r': %d", ch, ch)
|
||||
}
|
||||
kind = .Invalid
|
||||
}
|
||||
}
|
||||
|
||||
if lit == "" {
|
||||
lit = string(t.src[offset : t.offset])
|
||||
}
|
||||
return Token{kind, lit, pos}
|
||||
}
|
||||
651
core/encoding/xml/xml_reader.odin
Normal file
651
core/encoding/xml/xml_reader.odin
Normal file
@@ -0,0 +1,651 @@
|
||||
package xml
|
||||
/*
|
||||
An XML 1.0 / 1.1 parser
|
||||
|
||||
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
|
||||
Made available under Odin's BSD-3 license.
|
||||
|
||||
A from-scratch XML implementation, loosely modelled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
|
||||
|
||||
Features:
|
||||
- Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage.
|
||||
- Simple to understand and use. Small.
|
||||
|
||||
Caveats:
|
||||
- We do NOT support HTML in this package, as that may or may not be valid XML.
|
||||
If it works, great. If it doesn't, that's not considered a bug.
|
||||
|
||||
- We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences.
|
||||
- <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options.
|
||||
|
||||
TODO:
|
||||
- Optional CDATA unboxing.
|
||||
- Optional `>`, ` `, ` ` and other escape substitution in tag bodies.
|
||||
|
||||
MAYBE:
|
||||
- XML writer?
|
||||
- Serialize/deserialize Odin types?
|
||||
|
||||
List of contributors:
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
*/
|
||||
|
||||
import "core:strings"
|
||||
import "core:mem"
|
||||
import "core:os"
|
||||
|
||||
DEFAULT_Options :: Options{
|
||||
flags = {
|
||||
.Ignore_Unsupported,
|
||||
},
|
||||
expected_doctype = "",
|
||||
}
|
||||
|
||||
Option_Flag :: enum {
|
||||
/*
|
||||
Document MUST start with `<?xml` prolog.
|
||||
*/
|
||||
Must_Have_Prolog,
|
||||
|
||||
/*
|
||||
Document MUST have a `<!DOCTYPE`.
|
||||
*/
|
||||
Must_Have_DocType,
|
||||
|
||||
/*
|
||||
By default we skip comments. Use this option to intern a comment on a parented Element.
|
||||
*/
|
||||
Intern_Comments,
|
||||
|
||||
/*
|
||||
How to handle unsupported parts of the specification, like <! other than <!DOCTYPE and <![CDATA[
|
||||
*/
|
||||
Error_on_Unsupported,
|
||||
Ignore_Unsupported,
|
||||
|
||||
/*
|
||||
By default CDATA tags are passed-through as-is.
|
||||
This option unwraps them when encountered.
|
||||
*/
|
||||
Unbox_CDATA,
|
||||
|
||||
/*
|
||||
By default SGML entities like `>`, ` ` and ` ` are passed-through as-is.
|
||||
This option decodes them when encountered.
|
||||
*/
|
||||
Decode_SGML_Entities,
|
||||
}
|
||||
|
||||
Document :: struct {
|
||||
root: ^Element,
|
||||
prolog: Attributes,
|
||||
encoding: Encoding,
|
||||
|
||||
doctype: struct {
|
||||
/*
|
||||
We only scan the <!DOCTYPE IDENT part and skip the rest.
|
||||
*/
|
||||
ident: string,
|
||||
rest: string,
|
||||
},
|
||||
|
||||
/*
|
||||
Internal
|
||||
*/
|
||||
tokenizer: ^Tokenizer,
|
||||
allocator: mem.Allocator,
|
||||
intern: strings.Intern,
|
||||
}
|
||||
|
||||
Element :: struct {
|
||||
ident: string,
|
||||
value: string,
|
||||
attribs: Attributes,
|
||||
|
||||
kind: enum {
|
||||
Element = 0,
|
||||
Comment,
|
||||
},
|
||||
|
||||
parent: ^Element,
|
||||
children: [dynamic]^Element,
|
||||
}
|
||||
|
||||
Attr :: struct {
|
||||
key: string,
|
||||
val: string,
|
||||
}
|
||||
|
||||
Attributes :: [dynamic]Attr
|
||||
|
||||
Options :: struct {
|
||||
flags: Option_Flags,
|
||||
expected_doctype: string,
|
||||
}
|
||||
Option_Flags :: bit_set[Option_Flag]
|
||||
|
||||
Encoding :: enum {
|
||||
Unknown,
|
||||
|
||||
UTF_8,
|
||||
ISO_8859_1,
|
||||
|
||||
/*
|
||||
Aliases
|
||||
*/
|
||||
LATIN_1 = ISO_8859_1,
|
||||
}
|
||||
|
||||
Error :: enum {
|
||||
/*
|
||||
General return values.
|
||||
*/
|
||||
None = 0,
|
||||
General_Error,
|
||||
Unexpected_Token,
|
||||
Invalid_Token,
|
||||
|
||||
/*
|
||||
Couldn't find, open or read file.
|
||||
*/
|
||||
File_Error,
|
||||
|
||||
/*
|
||||
File too short.
|
||||
*/
|
||||
Premature_EOF,
|
||||
|
||||
/*
|
||||
XML-specific errors.
|
||||
*/
|
||||
No_Prolog,
|
||||
Invalid_Prolog,
|
||||
Too_Many_Prologs,
|
||||
|
||||
No_DocType,
|
||||
Too_Many_DocTypes,
|
||||
DocType_Must_Proceed_Elements,
|
||||
|
||||
/*
|
||||
If a DOCTYPE is present _or_ the caller
|
||||
asked for a specific DOCTYPE and the DOCTYPE
|
||||
and root tag don't match, we return `.Invalid_DocType`.
|
||||
*/
|
||||
Invalid_DocType,
|
||||
|
||||
Invalid_Tag_Value,
|
||||
Mismatched_Closing_Tag,
|
||||
|
||||
Unclosed_Comment,
|
||||
Comment_Before_Root_Element,
|
||||
Invalid_Sequence_In_Comment,
|
||||
|
||||
Unsupported_Version,
|
||||
Unsupported_Encoding,
|
||||
|
||||
/*
|
||||
<!FOO are usually skipped.
|
||||
*/
|
||||
Unhandled_Bang,
|
||||
|
||||
Duplicate_Attribute,
|
||||
Conflicting_Options,
|
||||
|
||||
/*
|
||||
Unhandled TODO:
|
||||
*/
|
||||
Unhandled_CDATA_Unboxing,
|
||||
Unhandled_SGML_Entity_Decoding,
|
||||
}
|
||||
|
||||
/*
|
||||
Implementation starts here.
|
||||
*/
|
||||
parse_from_slice :: proc(data: []u8, options := DEFAULT_Options, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
|
||||
context.allocator = allocator
|
||||
|
||||
opts := validate_options(options) or_return
|
||||
|
||||
t := &Tokenizer{}
|
||||
init(t, string(data), path, error_handler)
|
||||
|
||||
doc = new(Document)
|
||||
doc.allocator = allocator
|
||||
doc.tokenizer = t
|
||||
|
||||
strings.intern_init(&doc.intern, allocator, allocator)
|
||||
|
||||
err = .Unexpected_Token
|
||||
element, parent: ^Element
|
||||
|
||||
/*
|
||||
If a DOCTYPE is present, the root tag has to match.
|
||||
If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match.
|
||||
*/
|
||||
expected_doctype := options.expected_doctype
|
||||
|
||||
loop: for {
|
||||
tok := scan(t)
|
||||
#partial switch tok.kind {
|
||||
|
||||
case .Lt:
|
||||
open := scan(t)
|
||||
#partial switch open.kind {
|
||||
|
||||
case .Question:
|
||||
/*
|
||||
<?xml
|
||||
*/
|
||||
next := scan(t)
|
||||
#partial switch next.kind {
|
||||
case .Ident:
|
||||
if len(next.text) == 3 && strings.to_lower(next.text, context.temp_allocator) == "xml" {
|
||||
parse_prolog(doc) or_return
|
||||
} else if len(doc.prolog) > 0 {
|
||||
/*
|
||||
We've already seen a prolog.
|
||||
*/
|
||||
return doc, .Too_Many_Prologs
|
||||
} else {
|
||||
error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", tok.text)
|
||||
return
|
||||
}
|
||||
case:
|
||||
error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", tok.text)
|
||||
return
|
||||
}
|
||||
|
||||
case .Exclaim:
|
||||
/*
|
||||
<!
|
||||
*/
|
||||
next := scan(t)
|
||||
#partial switch next.kind {
|
||||
case .Ident:
|
||||
switch next.text {
|
||||
case "DOCTYPE":
|
||||
if len(doc.doctype.ident) > 0 {
|
||||
return doc, .Too_Many_DocTypes
|
||||
}
|
||||
if doc.root != nil {
|
||||
return doc, .DocType_Must_Proceed_Elements
|
||||
}
|
||||
parse_doctype(doc) or_return
|
||||
|
||||
if len(expected_doctype) > 0 && expected_doctype != doc.doctype.ident {
|
||||
error(t, t.offset, "Invalid DOCTYPE. Expected: %v, got: %v\n", expected_doctype, doc.doctype.ident)
|
||||
return doc, .Invalid_DocType
|
||||
}
|
||||
expected_doctype = doc.doctype.ident
|
||||
|
||||
case:
|
||||
if .Error_on_Unsupported in opts.flags {
|
||||
error(t, t.offset, "Unhandled: <!%v\n", next.text)
|
||||
err = .Unhandled_Bang
|
||||
return
|
||||
}
|
||||
skip_element(t) or_return
|
||||
}
|
||||
|
||||
case .Dash:
|
||||
/*
|
||||
Comment: <!-- -->.
|
||||
The grammar does not allow a comment to end in --->
|
||||
*/
|
||||
if doc.root == nil {
|
||||
return doc, .Comment_Before_Root_Element
|
||||
}
|
||||
|
||||
expect(t, .Dash)
|
||||
offset := t.offset
|
||||
|
||||
for {
|
||||
advance_rune(t)
|
||||
ch := t.ch
|
||||
|
||||
/*
|
||||
A comment ends when we see -->, preceded by a character that's not a dash.
|
||||
"For compatibility, the string "--" (double-hyphen) must not occur within comments."
|
||||
|
||||
See: https://www.w3.org/TR/2006/REC-xml11-20060816/#dt-comment
|
||||
|
||||
Thanks to the length (4) of the comment start, we also have enough lookback,
|
||||
and the peek at the next byte asserts that there's at least one more character
|
||||
that's a `>`.
|
||||
*/
|
||||
if ch < 0 {
|
||||
error(t, offset, "[parse] Comment was not terminated\n")
|
||||
return doc, .Unclosed_Comment
|
||||
}
|
||||
|
||||
if string(t.src[t.offset - 1:][:2]) == "--" {
|
||||
if peek_byte(t) == '>' {
|
||||
break
|
||||
} else {
|
||||
error(t, t.offset - 1, "Invalid -- sequence in comment.\n")
|
||||
return doc, .Invalid_Sequence_In_Comment
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if .Intern_Comments in opts.flags {
|
||||
el := new(Element)
|
||||
|
||||
el.parent = element
|
||||
el.kind = .Comment
|
||||
el.value = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1]))
|
||||
append(&element.children, el)
|
||||
}
|
||||
|
||||
expect(t, .Dash)
|
||||
expect(t, .Gt)
|
||||
|
||||
case:
|
||||
error(t, t.offset, "Invalid Token after <!. Expected .Ident, got %#v\n", next)
|
||||
return
|
||||
}
|
||||
|
||||
case .Ident:
|
||||
/*
|
||||
e.g. <odin - Start of new element.
|
||||
*/
|
||||
element = new(Element)
|
||||
|
||||
if doc.root == nil {
|
||||
/*
|
||||
First element.
|
||||
*/
|
||||
doc.root = element
|
||||
parent = element
|
||||
} else {
|
||||
append(&parent.children, element)
|
||||
}
|
||||
|
||||
element.parent = parent
|
||||
element.ident = strings.intern_get(&doc.intern, open.text)
|
||||
|
||||
parse_attributes(doc, &element.attribs) or_return
|
||||
|
||||
/*
|
||||
If a DOCTYPE is present _or_ the caller
|
||||
asked for a specific DOCTYPE and the DOCTYPE
|
||||
and root tag don't match, we return .Invalid_Root_Tag.
|
||||
*/
|
||||
if element == doc.root {
|
||||
if len(expected_doctype) > 0 && expected_doctype != open.text {
|
||||
error(t, t.offset, "Root Tag doesn't match DOCTYPE. Expected: %v, got: %v\n", expected_doctype, open.text)
|
||||
return doc, .Invalid_DocType
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
One of these should follow:
|
||||
- `>`, which means we've just opened this tag and expect a later element to close it.
|
||||
- `/>`, which means this is an 'empty' or self-closing tag.
|
||||
*/
|
||||
end_token := scan(t)
|
||||
|
||||
#partial switch end_token.kind {
|
||||
case .Gt:
|
||||
/*
|
||||
We're now the new parent.
|
||||
*/
|
||||
parent = element
|
||||
|
||||
case .Slash:
|
||||
/*
|
||||
Empty tag?
|
||||
*/
|
||||
expect(t, .Gt) or_return
|
||||
|
||||
case:
|
||||
error(t, t.offset, "Expected close tag, got: %#v\n", end_token)
|
||||
return
|
||||
}
|
||||
|
||||
case .Slash:
|
||||
/*
|
||||
Close tag.
|
||||
*/
|
||||
ident := expect(t, .Ident) or_return
|
||||
_ = expect(t, .Gt) or_return
|
||||
|
||||
if element.ident != ident.text {
|
||||
error(t, t.offset, "Mismatched Closing Tag: %v\n", ident.text)
|
||||
return doc, .Mismatched_Closing_Tag
|
||||
}
|
||||
parent = element.parent
|
||||
element = parent
|
||||
|
||||
case:
|
||||
error(t, t.offset, "Invalid Token after <: %#v\n", open)
|
||||
return
|
||||
}
|
||||
|
||||
case .EOF:
|
||||
break loop
|
||||
|
||||
case:
|
||||
/*
|
||||
This should be a tag's body text.
|
||||
*/
|
||||
element.value = scan_string(t, tok.pos.offset) or_return
|
||||
}
|
||||
}
|
||||
|
||||
if .Must_Have_Prolog in opts.flags && len(doc.prolog) == 0 {
|
||||
return doc, .No_Prolog
|
||||
}
|
||||
|
||||
if .Must_Have_DocType in opts.flags && len(doc.doctype.ident) == 0 {
|
||||
return doc, .No_DocType
|
||||
}
|
||||
|
||||
return doc, .None
|
||||
}
|
||||
|
||||
parse_from_file :: proc(filename: string, options := DEFAULT_Options, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
|
||||
context.allocator = allocator
|
||||
|
||||
data, data_ok := os.read_entire_file(filename)
|
||||
defer delete(data)
|
||||
|
||||
if !data_ok { return {}, .File_Error }
|
||||
|
||||
return parse_from_slice(data, options, filename, error_handler, allocator)
|
||||
}
|
||||
|
||||
parse :: proc { parse_from_file, parse_from_slice }
|
||||
|
||||
free_element :: proc(element: ^Element) {
|
||||
if element == nil { return }
|
||||
|
||||
for child in element.children {
|
||||
/*
|
||||
NOTE: Recursive.
|
||||
|
||||
Could be rewritten so it adds them to a list of pointers to free.
|
||||
*/
|
||||
free_element(child)
|
||||
}
|
||||
delete(element.attribs)
|
||||
delete(element.children)
|
||||
free(element)
|
||||
}
|
||||
|
||||
destroy :: proc(doc: ^Document) {
|
||||
if doc == nil { return }
|
||||
|
||||
free_element(doc.root)
|
||||
strings.intern_destroy(&doc.intern)
|
||||
|
||||
delete(doc.prolog)
|
||||
free(doc)
|
||||
}
|
||||
|
||||
/*
|
||||
Helpers.
|
||||
*/
|
||||
|
||||
validate_options :: proc(options: Options) -> (validated: Options, err: Error) {
|
||||
validated = options
|
||||
|
||||
if .Error_on_Unsupported in validated.flags && .Ignore_Unsupported in validated.flags {
|
||||
return options, .Conflicting_Options
|
||||
}
|
||||
|
||||
if .Unbox_CDATA in validated.flags {
|
||||
return options, .Unhandled_CDATA_Unboxing
|
||||
}
|
||||
|
||||
if .Decode_SGML_Entities in validated.flags {
|
||||
return options, .Unhandled_SGML_Entity_Decoding
|
||||
}
|
||||
|
||||
return validated, .None
|
||||
}
|
||||
|
||||
expect :: proc(t: ^Tokenizer, kind: Token_Kind) -> (tok: Token, err: Error) {
|
||||
tok = scan(t)
|
||||
if tok.kind == kind { return tok, .None }
|
||||
|
||||
error(t, t.offset, "Expected \"%v\", got \"%v\".", kind, tok.kind)
|
||||
return tok, .Unexpected_Token
|
||||
}
|
||||
|
||||
parse_attribute :: proc(doc: ^Document) -> (attr: Attr, offset: int, err: Error) {
|
||||
assert(doc != nil)
|
||||
context.allocator = doc.allocator
|
||||
t := doc.tokenizer
|
||||
|
||||
key := expect(t, .Ident) or_return
|
||||
offset = t.offset - len(key.text)
|
||||
|
||||
_ = expect(t, .Eq) or_return
|
||||
value := expect(t, .String) or_return
|
||||
|
||||
attr.key = strings.intern_get(&doc.intern, key.text)
|
||||
attr.val = strings.intern_get(&doc.intern, value.text)
|
||||
|
||||
err = .None
|
||||
return
|
||||
}
|
||||
|
||||
check_duplicate_attributes :: proc(t: ^Tokenizer, attribs: Attributes, attr: Attr, offset: int) -> (err: Error) {
|
||||
for a in attribs {
|
||||
if attr.key == a.key {
|
||||
error(t, offset, "Duplicate attribute: %v\n", attr.key)
|
||||
return .Duplicate_Attribute
|
||||
}
|
||||
}
|
||||
return .None
|
||||
}
|
||||
|
||||
parse_attributes :: proc(doc: ^Document, attribs: ^Attributes) -> (err: Error) {
|
||||
assert(doc != nil)
|
||||
context.allocator = doc.allocator
|
||||
t := doc.tokenizer
|
||||
|
||||
for peek(t).kind == .Ident {
|
||||
attr, offset := parse_attribute(doc) or_return
|
||||
check_duplicate_attributes(t, attribs^, attr, offset) or_return
|
||||
append(attribs, attr)
|
||||
}
|
||||
skip_whitespace(t)
|
||||
return .None
|
||||
}
|
||||
|
||||
parse_prolog :: proc(doc: ^Document) -> (err: Error) {
|
||||
assert(doc != nil)
|
||||
context.allocator = doc.allocator
|
||||
t := doc.tokenizer
|
||||
|
||||
offset := t.offset
|
||||
parse_attributes(doc, &doc.prolog) or_return
|
||||
|
||||
for attr in doc.prolog {
|
||||
switch attr.key {
|
||||
case "version":
|
||||
switch attr.val {
|
||||
case "1.0", "1.1":
|
||||
case:
|
||||
error(t, offset, "[parse_prolog] Warning: Unhandled XML version: %v\n", attr.val)
|
||||
}
|
||||
|
||||
case "encoding":
|
||||
switch strings.to_lower(attr.val, context.temp_allocator) {
|
||||
case "utf-8", "utf8":
|
||||
doc.encoding = .UTF_8
|
||||
|
||||
case "latin-1", "latin1", "iso-8859-1":
|
||||
doc.encoding = .LATIN_1
|
||||
|
||||
case:
|
||||
/*
|
||||
Unrecognized encoding, assume UTF-8.
|
||||
*/
|
||||
error(t, offset, "[parse_prolog] Warning: Unrecognized encoding: %v\n", attr.val)
|
||||
}
|
||||
|
||||
case:
|
||||
// Ignored.
|
||||
}
|
||||
}
|
||||
|
||||
_ = expect(t, .Question) or_return
|
||||
_ = expect(t, .Gt) or_return
|
||||
|
||||
return .None
|
||||
}
|
||||
|
||||
skip_element :: proc(t: ^Tokenizer) -> (err: Error) {
|
||||
close := 1
|
||||
|
||||
loop: for {
|
||||
tok := scan(t)
|
||||
#partial switch tok.kind {
|
||||
case .EOF:
|
||||
error(t, t.offset, "[skip_element] Premature EOF\n")
|
||||
return .Premature_EOF
|
||||
|
||||
case .Lt:
|
||||
close += 1
|
||||
|
||||
case .Gt:
|
||||
close -= 1
|
||||
if close == 0 {
|
||||
break loop
|
||||
}
|
||||
|
||||
case:
|
||||
|
||||
}
|
||||
}
|
||||
return .None
|
||||
}
|
||||
|
||||
parse_doctype :: proc(doc: ^Document) -> (err: Error) {
|
||||
/*
|
||||
<!DOCTYPE greeting SYSTEM "hello.dtd">
|
||||
|
||||
<!DOCTYPE greeting [
|
||||
<!ELEMENT greeting (#PCDATA)>
|
||||
]>
|
||||
*/
|
||||
assert(doc != nil)
|
||||
context.allocator = doc.allocator
|
||||
t := doc.tokenizer
|
||||
|
||||
tok := expect(t, .Ident) or_return
|
||||
doc.doctype.ident = strings.intern_get(&doc.intern, tok.text)
|
||||
|
||||
skip_whitespace(t)
|
||||
offset := t.offset
|
||||
skip_element(t) or_return
|
||||
|
||||
/*
|
||||
-1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it.
|
||||
*/
|
||||
doc.doctype.rest = strings.intern_get(&doc.intern, string(t.src[offset : t.offset - 1]))
|
||||
return .None
|
||||
}
|
||||
@@ -1,22 +1,29 @@
|
||||
ODIN=../../odin
|
||||
PYTHON=$(shell which python3)
|
||||
|
||||
all: download_test_assets image_test compress_test strings_test hash_test crypto_test
|
||||
all: download_test_assets image_test compress_test strings_test hash_test crypto_test encoding_test
|
||||
|
||||
download_test_assets:
|
||||
$(PYTHON) download_assets.py
|
||||
|
||||
image_test:
|
||||
$(ODIN) run image/test_core_image.odin
|
||||
$(ODIN) run image/test_core_image.odin -out=test_image -o:speed -no-bounds-check
|
||||
|
||||
compress_test:
|
||||
$(ODIN) run compress/test_core_compress.odin
|
||||
$(ODIN) run compress/test_core_compress.odin -out=test_compress -o:speed -no-bounds-check
|
||||
|
||||
strings_test:
|
||||
$(ODIN) run strings/test_core_strings.odin
|
||||
$(ODIN) run strings/test_core_strings.odin -out=test_strings -o:speed -no-bounds-check
|
||||
|
||||
odin_test:
|
||||
$(ODIN) run odin -out=test_odin -o:speed -no-bounds-check
|
||||
|
||||
hash_test:
|
||||
$(ODIN) run hash -out=test_hash -o:speed -no-bounds-check
|
||||
|
||||
crypto_test:
|
||||
$(ODIN) run crypto -out=crypto_hash -o:speed -no-bounds-check
|
||||
$(ODIN) run crypto -out=crypto_hash -o:speed -no-bounds-check
|
||||
|
||||
encoding_test:
|
||||
$(ODIN) run encoding/json -out=test_encoding_json -o:speed -no-bounds-check
|
||||
$(ODIN) run encoding/xml -out=test_encoding_xml -o:speed -no-bounds-check
|
||||
|
||||
35
tests/core/assets/xml/nl_NL-qt-ts.ts
Normal file
35
tests/core/assets/xml/nl_NL-qt-ts.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE TS>
|
||||
<TS version="2.1" language="nl" sourcelanguage="en">
|
||||
<context>
|
||||
<name>Page</name>
|
||||
<message>
|
||||
<source>Text for translation</source>
|
||||
<comment>commenting</comment>
|
||||
<translation type="obsolete">Tekst om te vertalen</translation>
|
||||
</message>
|
||||
<message>
|
||||
<source>Also text to translate</source>
|
||||
<extracomment>some text</extracomment>
|
||||
<translation>Ook tekst om te vertalen</translation>
|
||||
</message>
|
||||
</context>
|
||||
<context>
|
||||
<name>installscript</name>
|
||||
<message>
|
||||
<source>99 bottles of beer on the wall</source>
|
||||
<oldcomment>some new comments here</oldcomment>
|
||||
<translation>99 flessen bier op de muur</translation>
|
||||
</message>
|
||||
</context>
|
||||
<context>
|
||||
<name>apple_count</name>
|
||||
<message numerus="yes">
|
||||
<source>%d apple(s)</source>
|
||||
<translation>
|
||||
<numerusform>%d appel</numerusform>
|
||||
<numerusform>%d appels</numerusform>
|
||||
</translation>
|
||||
</message>
|
||||
</context>
|
||||
</TS>
|
||||
38
tests/core/assets/xml/nl_NL-xliff-1.0.xliff
Normal file
38
tests/core/assets/xml/nl_NL-xliff-1.0.xliff
Normal file
@@ -0,0 +1,38 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<xliff version="1.2" xmlns="urn:oasis:names:tc:xliff:document:1.2">
|
||||
<file id="42" original="Foozle.xml" source-language="en" target-language="nl-NL" datatype="plaintext">
|
||||
<body>
|
||||
<trans-unit id="874396" maxwidth="20" size-unit="char">
|
||||
<source>text</source>
|
||||
<target state="translated">tekst</target>
|
||||
<note>Context</note>
|
||||
</trans-unit>
|
||||
<trans-unit id="874397" approved="yes">
|
||||
<source>text 1</source>
|
||||
<target state="translated">tekst 1</target>
|
||||
<note>Context 1</note>
|
||||
</trans-unit>
|
||||
<trans-unit id="874398">
|
||||
<source>text 2</source>
|
||||
<target state="needs-translation"/>
|
||||
<context context-type="context">Context of the segment 2</context>
|
||||
</trans-unit>
|
||||
<trans-unit id="874399" translate="no">
|
||||
<source>text 3</source>
|
||||
<target state="final">translation 3</target>
|
||||
<note>Context 3</note>
|
||||
</trans-unit>
|
||||
<group restype="x-gettext-plurals">
|
||||
<note>Plurals</note>
|
||||
<trans-unit id="14343743[0]">
|
||||
<source>%d month</source>
|
||||
<target xml:lang="nl" state="translated">%d maand</target>
|
||||
</trans-unit>
|
||||
<trans-unit id="14343743[1]">
|
||||
<source>%d months</source>
|
||||
<target xml:lang="nl" state="translated">%d maanden</target>
|
||||
</trans-unit>
|
||||
</group>
|
||||
</body>
|
||||
</file>
|
||||
</xliff>
|
||||
52
tests/core/assets/xml/nl_NL-xliff-2.0.xliff
Normal file
52
tests/core/assets/xml/nl_NL-xliff-2.0.xliff
Normal file
@@ -0,0 +1,52 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<xliff xmlns="urn:oasis:names:tc:xliff:document:2.0" version="2.0" srcLang="en" trgLang="nl">
|
||||
<file id="f1">
|
||||
<notes>
|
||||
<note id="n1">Note for file</note>
|
||||
</notes>
|
||||
<unit id="u1">
|
||||
<notes>
|
||||
<note id="n1">Note for unit</note>
|
||||
</notes>
|
||||
<segment id="s1" state="initial">
|
||||
<source>text</source>
|
||||
<target></target>
|
||||
</segment>
|
||||
</unit>
|
||||
<unit id="u2">
|
||||
<notes>
|
||||
<note id="n2">Note for unit 2</note>
|
||||
</notes>
|
||||
<segment id="s2" state="translated">
|
||||
<source>text 2</source>
|
||||
<target>translation 2</target>
|
||||
</segment>
|
||||
</unit>
|
||||
<unit id="u3">
|
||||
<notes>
|
||||
<note id="n3">Note for unit 3</note>
|
||||
</notes>
|
||||
<segment id="s3" state="final">
|
||||
<source>text 3</source>
|
||||
<target>approved translation 3</target>
|
||||
</segment>
|
||||
</unit>
|
||||
<group id="90290" type="x-gettext:plurals">
|
||||
<unit id="90291" name="90290[0]">
|
||||
<notes>
|
||||
<note category="context">Plurals</note>
|
||||
</notes>
|
||||
<segment>
|
||||
<source>%d month</source>
|
||||
<target xml:lang="nl">%d maand</target>
|
||||
</segment>
|
||||
</unit>
|
||||
<unit id="90292" name="90290[1]">
|
||||
<segment>
|
||||
<source>%d months</source>
|
||||
<target xml:lang="nl">%d maanden</target>
|
||||
</segment>
|
||||
</unit>
|
||||
</group>
|
||||
</file>
|
||||
</xliff>
|
||||
8
tests/core/assets/xml/utf8.xml
Normal file
8
tests/core/assets/xml/utf8.xml
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<!DOCTYPE 恥ずべきフクロウ>
|
||||
<恥ずべきフクロウ 올빼미_id="Foozle <![CDATA[<greeting>Hello, world!"</greeting>]]>Barzle">
|
||||
<부끄러운:barzle>
|
||||
<name foo:bar="birmese">ရှက်စရာ ဇီးကွက်</name>
|
||||
<nickname>Owl of Shame</nickname>
|
||||
<data>More CDATA <![CDATA[<greeting>Hello, world!</greeting><![CDATA] < ]]> Nonsense.</data>
|
||||
</부끄러운:barzle>
|
||||
@@ -5,34 +5,35 @@ python3 download_assets.py
|
||||
echo ---
|
||||
echo Running core:image tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run image %COMMON%
|
||||
%PATH_TO_ODIN% run image %COMMON% -out:test_image.exe
|
||||
|
||||
echo ---
|
||||
echo Running core:compress tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run compress %COMMON%
|
||||
%PATH_TO_ODIN% run compress %COMMON% -out:test_compress.exe
|
||||
|
||||
echo ---
|
||||
echo Running core:strings tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run strings %COMMON%
|
||||
%PATH_TO_ODIN% run strings %COMMON% -out:test_strings.exe
|
||||
|
||||
echo ---
|
||||
echo Running core:hash tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run hash %COMMON% -o:size
|
||||
%PATH_TO_ODIN% run hash %COMMON% -o:size -out:test_hash.exe
|
||||
|
||||
echo ---
|
||||
echo Running core:odin tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run odin %COMMON% -o:size
|
||||
%PATH_TO_ODIN% run odin %COMMON% -o:size -out:test_odin.exe
|
||||
|
||||
echo ---
|
||||
echo Running core:crypto hash tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run crypto %COMMON%
|
||||
%PATH_TO_ODIN% run crypto %COMMON% -o:speed -out:test_crypto.exe
|
||||
|
||||
echo ---
|
||||
echo Running core:encoding tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run encoding %COMMON%
|
||||
%PATH_TO_ODIN% run encoding\json %COMMON% -out:test_json.exe
|
||||
%PATH_TO_ODIN% run encoding\xml %COMMON% -out:test_xml.exe
|
||||
@@ -8,32 +8,32 @@ TEST_count := 0
|
||||
TEST_fail := 0
|
||||
|
||||
when ODIN_TEST {
|
||||
expect :: testing.expect
|
||||
log :: testing.log
|
||||
expect :: testing.expect
|
||||
log :: testing.log
|
||||
} else {
|
||||
expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
|
||||
fmt.printf("[%v] ", loc)
|
||||
TEST_count += 1
|
||||
if !condition {
|
||||
TEST_fail += 1
|
||||
fmt.println(message)
|
||||
return
|
||||
}
|
||||
fmt.println(" PASS")
|
||||
}
|
||||
log :: proc(t: ^testing.T, v: any, loc := #caller_location) {
|
||||
fmt.printf("[%v] ", loc)
|
||||
fmt.printf("log: %v\n", v)
|
||||
}
|
||||
expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
|
||||
fmt.printf("[%v] ", loc)
|
||||
TEST_count += 1
|
||||
if !condition {
|
||||
TEST_fail += 1
|
||||
fmt.println(message)
|
||||
return
|
||||
}
|
||||
fmt.println(" PASS")
|
||||
}
|
||||
log :: proc(t: ^testing.T, v: any, loc := #caller_location) {
|
||||
fmt.printf("[%v] ", loc)
|
||||
fmt.printf("log: %v\n", v)
|
||||
}
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
t := testing.T{}
|
||||
t := testing.T{}
|
||||
|
||||
parse_json(&t)
|
||||
marshal_json(&t)
|
||||
|
||||
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
|
||||
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
|
||||
}
|
||||
|
||||
@test
|
||||
264
tests/core/encoding/xml/test_core_xml.odin
Normal file
264
tests/core/encoding/xml/test_core_xml.odin
Normal file
@@ -0,0 +1,264 @@
|
||||
package test_core_xml
|
||||
|
||||
import "core:encoding/xml"
|
||||
import "core:testing"
|
||||
import "core:mem"
|
||||
import "core:fmt"
|
||||
|
||||
Silent :: proc(pos: xml.Pos, fmt: string, args: ..any) {
|
||||
// Custom (silent) error handler.
|
||||
}
|
||||
|
||||
OPTIONS :: xml.Options{
|
||||
flags = {
|
||||
.Ignore_Unsupported, .Intern_Comments,
|
||||
},
|
||||
expected_doctype = "",
|
||||
}
|
||||
|
||||
TEST_count := 0
|
||||
TEST_fail := 0
|
||||
|
||||
TEST :: struct {
|
||||
filename: string,
|
||||
options: xml.Options,
|
||||
expected: struct {
|
||||
error: xml.Error,
|
||||
xml_version: string,
|
||||
xml_encoding: string,
|
||||
doctype: string,
|
||||
},
|
||||
}
|
||||
|
||||
TESTS :: []TEST{
|
||||
/*
|
||||
First we test that certain files parse without error.
|
||||
*/
|
||||
{
|
||||
filename = "assets/xml/utf8.xml",
|
||||
options = OPTIONS,
|
||||
expected = {
|
||||
error = .None,
|
||||
xml_version = "1.0",
|
||||
xml_encoding = "utf-8",
|
||||
doctype = "恥ずべきフクロウ",
|
||||
},
|
||||
},
|
||||
{
|
||||
filename = "assets/xml/nl_NL-qt-ts.ts",
|
||||
options = OPTIONS,
|
||||
expected = {
|
||||
error = .None,
|
||||
xml_version = "1.0",
|
||||
xml_encoding = "utf-8",
|
||||
doctype = "TS",
|
||||
},
|
||||
},
|
||||
{
|
||||
filename = "assets/xml/nl_NL-xliff-1.0.xliff",
|
||||
options = OPTIONS,
|
||||
expected = {
|
||||
error = .None,
|
||||
xml_version = "1.0",
|
||||
xml_encoding = "UTF-8",
|
||||
doctype = "",
|
||||
},
|
||||
},
|
||||
{
|
||||
filename = "assets/xml/nl_NL-xliff-2.0.xliff",
|
||||
options = OPTIONS,
|
||||
expected = {
|
||||
error = .None,
|
||||
xml_version = "1.0",
|
||||
xml_encoding = "utf-8",
|
||||
doctype = "",
|
||||
},
|
||||
},
|
||||
|
||||
/*
|
||||
Then we test that certain errors are returned as expected.
|
||||
*/
|
||||
{
|
||||
filename = "assets/xml/utf8.xml",
|
||||
options = {
|
||||
flags = {
|
||||
.Ignore_Unsupported, .Intern_Comments,
|
||||
},
|
||||
expected_doctype = "Odin",
|
||||
},
|
||||
expected = {
|
||||
error = .Invalid_DocType,
|
||||
xml_version = "1.0",
|
||||
xml_encoding = "utf-8",
|
||||
doctype = "恥ずべきフクロウ",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
when ODIN_TEST {
|
||||
expect :: testing.expect
|
||||
log :: testing.log
|
||||
} else {
|
||||
expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
|
||||
fmt.printf("[%v] ", loc)
|
||||
TEST_count += 1
|
||||
if !condition {
|
||||
TEST_fail += 1
|
||||
fmt.println(message)
|
||||
return
|
||||
}
|
||||
fmt.println(" PASS")
|
||||
}
|
||||
log :: proc(t: ^testing.T, v: any, loc := #caller_location) {
|
||||
fmt.printf("[%v] ", loc)
|
||||
fmt.printf("log: %v\n", v)
|
||||
}
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
t := testing.T{}
|
||||
|
||||
track: mem.Tracking_Allocator
|
||||
mem.tracking_allocator_init(&track, context.allocator)
|
||||
context.allocator = mem.tracking_allocator(&track)
|
||||
|
||||
run_tests(&t)
|
||||
|
||||
if len(track.allocation_map) > 0 {
|
||||
for _, v in track.allocation_map {
|
||||
err_msg := fmt.tprintf("%v Leaked %v bytes.", v.location, v.size)
|
||||
expect(&t, false, err_msg)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
|
||||
}
|
||||
|
||||
@test
|
||||
run_tests :: proc(t: ^testing.T) {
|
||||
using fmt
|
||||
|
||||
count := 0
|
||||
|
||||
for test in TESTS {
|
||||
printf("Trying to parse %v\n\n", test.filename)
|
||||
|
||||
doc, err := xml.parse(test.filename, test.options, Silent)
|
||||
defer xml.destroy(doc)
|
||||
|
||||
err_msg := tprintf("Expected return value %v, got %v", test.expected.error, err)
|
||||
expect(t, err == test.expected.error, err_msg)
|
||||
|
||||
if len(test.expected.xml_version) > 0 {
|
||||
xml_version := ""
|
||||
for attr in doc.prolog {
|
||||
if attr.key == "version" {
|
||||
xml_version = attr.val
|
||||
}
|
||||
}
|
||||
|
||||
err_msg = tprintf("Expected XML version %v, got %v", test.expected.xml_version, xml_version)
|
||||
expect(t, xml_version == test.expected.xml_version, err_msg)
|
||||
}
|
||||
|
||||
if len(test.expected.xml_encoding) > 0 {
|
||||
xml_encoding := ""
|
||||
for attr in doc.prolog {
|
||||
if attr.key == "encoding" {
|
||||
xml_encoding = attr.val
|
||||
}
|
||||
}
|
||||
|
||||
err_msg = tprintf("Expected XML encoding %v, got %v", test.expected.xml_encoding, xml_encoding)
|
||||
expect(t, xml_encoding == test.expected.xml_encoding, err_msg)
|
||||
}
|
||||
|
||||
err_msg = tprintf("Expected DOCTYPE %v, got %v", test.expected.doctype, doc.doctype.ident)
|
||||
expect(t, doc.doctype.ident == test.expected.doctype, err_msg)
|
||||
|
||||
/*
|
||||
File-specific tests.
|
||||
*/
|
||||
switch count {
|
||||
case 0:
|
||||
expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
|
||||
attr := doc.root.attribs[0]
|
||||
|
||||
attr_key_expected := "올빼미_id"
|
||||
attr_val_expected := "Foozle <![CDATA[<greeting>Hello, world!\"</greeting>]]>Barzle"
|
||||
|
||||
attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
|
||||
expect(t, attr.key == attr_key_expected, attr_err)
|
||||
|
||||
attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
|
||||
expect(t, attr.val == attr_val_expected, attr_err)
|
||||
|
||||
expect(t, len(doc.root.children) > 0, "Expected the root tag to have children.")
|
||||
child := doc.root.children[0]
|
||||
|
||||
first_child_ident := "부끄러운:barzle"
|
||||
attr_err = tprintf("Expected first child tag's ident to be %v, got %v", first_child_ident, child.ident)
|
||||
expect(t, child.ident == first_child_ident, attr_err)
|
||||
|
||||
case 2:
|
||||
expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
|
||||
|
||||
{
|
||||
attr := doc.root.attribs[0]
|
||||
|
||||
attr_key_expected := "version"
|
||||
attr_val_expected := "1.2"
|
||||
|
||||
attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
|
||||
expect(t, attr.key == attr_key_expected, attr_err)
|
||||
|
||||
attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
|
||||
expect(t, attr.val == attr_val_expected, attr_err)
|
||||
}
|
||||
|
||||
{
|
||||
attr := doc.root.attribs[1]
|
||||
|
||||
attr_key_expected := "xmlns"
|
||||
attr_val_expected := "urn:oasis:names:tc:xliff:document:1.2"
|
||||
|
||||
attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
|
||||
expect(t, attr.key == attr_key_expected, attr_err)
|
||||
|
||||
attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
|
||||
expect(t, attr.val == attr_val_expected, attr_err)
|
||||
}
|
||||
|
||||
case 3:
|
||||
expect(t, len(doc.root.attribs) > 0, "Expected the root tag to have an attribute.")
|
||||
|
||||
{
|
||||
attr := doc.root.attribs[0]
|
||||
|
||||
attr_key_expected := "xmlns"
|
||||
attr_val_expected := "urn:oasis:names:tc:xliff:document:2.0"
|
||||
|
||||
attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
|
||||
expect(t, attr.key == attr_key_expected, attr_err)
|
||||
|
||||
attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
|
||||
expect(t, attr.val == attr_val_expected, attr_err)
|
||||
}
|
||||
|
||||
{
|
||||
attr := doc.root.attribs[1]
|
||||
|
||||
attr_key_expected := "version"
|
||||
attr_val_expected := "2.0"
|
||||
|
||||
attr_err := tprintf("Expected %v, got %v", attr_key_expected, attr.key)
|
||||
expect(t, attr.key == attr_key_expected, attr_err)
|
||||
|
||||
attr_err = tprintf("Expected %v, got %v", attr_val_expected, attr.val)
|
||||
expect(t, attr.val == attr_val_expected, attr_err)
|
||||
}
|
||||
}
|
||||
|
||||
count += 1
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user