Update XML reader to normalize whitespace, part 1.

This commit is contained in:
Jeroen van Rijn
2024-06-12 12:52:48 +02:00
parent e87c5bca58
commit ebadff555d
4 changed files with 70 additions and 116 deletions

View File

@@ -203,9 +203,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
doc.elements = make([dynamic]Element, 1024, 1024, allocator)
// strings.intern_init(&doc.intern, allocator, allocator)
err = .Unexpected_Token
err = .Unexpected_Token
element, parent: Element_ID
open: Token
@@ -259,8 +257,8 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
case .Slash:
// Empty tag. Close it.
expect(t, .Gt) or_return
parent = doc.elements[element].parent
element = parent
parent = doc.elements[element].parent
element = parent
case:
error(t, t.offset, "Expected close tag, got: %#v\n", end_token)
@@ -276,8 +274,8 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
error(t, t.offset, "Mismatched Closing Tag. Expected %v, got %v\n", doc.elements[element].ident, ident.text)
return doc, .Mismatched_Closing_Tag
}
parent = doc.elements[element].parent
element = parent
parent = doc.elements[element].parent
element = parent
} else if open.kind == .Exclaim {
// <!
@@ -463,8 +461,8 @@ validate_options :: proc(options: Options) -> (validated: Options, err: Error) {
return validated, .None
}
expect :: proc(t: ^Tokenizer, kind: Token_Kind) -> (tok: Token, err: Error) {
tok = scan(t)
expect :: proc(t: ^Tokenizer, kind: Token_Kind, multiline_string := false) -> (tok: Token, err: Error) {
tok = scan(t, multiline_string=multiline_string)
if tok.kind == kind { return tok, .None }
error(t, t.offset, "Expected \"%v\", got \"%v\".", kind, tok.kind)
@@ -480,7 +478,13 @@ parse_attribute :: proc(doc: ^Document) -> (attr: Attribute, offset: int, err: E
offset = t.offset - len(key.text)
_ = expect(t, .Eq) or_return
value := expect(t, .String) or_return
value := expect(t, .String, multiline_string=true) or_return
normalized, normalize_err := entity.decode_xml(value.text, {.Normalize_Whitespace}, doc.allocator)
if normalize_err == .None {
append(&doc.strings_to_free, normalized)
value.text = normalized
}
attr.key = key.text
attr.val = value.text