Merge pull request #3107 from ktsiligkiris/documentation/fix_xml_docs

Fix comments for proper rendering in documentation in encoding/xml
2026-06-03 09:14:38 +00:00 · 2024-01-17 21:10:49 +01:00
parent 1b83f4a18b d54f34a162
commit ea43c030aa
5 changed files with 85 additions and 156 deletions
--- a/core/encoding/xml/debug_print.odin
+++ b/core/encoding/xml/debug_print.odin
@@ -1,3 +1,5 @@
+package xml
+
 /*
 	An XML 1.0 / 1.1 parser

@@ -9,7 +11,7 @@
 	List of contributors:
 		Jeroen van Rijn: Initial implementation.
 */
-package xml
+

 import "core:io"
 import "core:fmt"
@@ -81,4 +83,4 @@ print_element :: proc(writer: io.Writer, doc: ^Document, element_id: Element_ID,
 	}

 	return written, .None
-}
+}
--- a/core/encoding/xml/example/xml_example.odin
+++ b/core/encoding/xml/example/xml_example.odin
@@ -20,7 +20,7 @@ example :: proc() {
 		xml.destroy(docs[round])
 	}

-	DOC :: #load("../../../../tests/core/assets/XML/unicode.xml")
+	DOC :: #load("../../../../tests/core/assets/XML/utf8.xml")
 	input := DOC

 	for round in 0..<N {
@@ -109,4 +109,4 @@ main :: proc() {
 		}
 	}
 	println("Done and cleaned up!")
-}
+}
--- a/core/encoding/xml/helpers.odin
+++ b/core/encoding/xml/helpers.odin
@@ -1,3 +1,5 @@
+package xml
+
 /*
 	An XML 1.0 / 1.1 parser

@@ -6,7 +8,7 @@

 	This file contains helper functions.
 */
-package xml
+

 // Find parent's nth child with a given ident.
 find_child_by_ident :: proc(doc: ^Document, parent_id: Element_ID, ident: string, nth := 0) -> (res: Element_ID, found: bool) {
@@ -47,4 +49,4 @@ find_attribute_val_by_key :: proc(doc: ^Document, parent_id: Element_ID, key: st
 		if attr.key == key { return attr.val, true }
 	}
 	return "", false
-}
+}
--- a/core/encoding/xml/tokenizer.odin
+++ b/core/encoding/xml/tokenizer.odin
@@ -1,3 +1,5 @@
+package xml
+
 /*
 	An XML 1.0 / 1.1 parser

@@ -9,7 +11,7 @@
 	List of contributors:
 		Jeroen van Rijn: Initial implementation.
 */
-package xml
+

 import "core:fmt"
 import "core:unicode"
@@ -433,4 +435,4 @@ scan :: proc(t: ^Tokenizer) -> Token {
 		lit = string(t.src[offset : t.offset])
 	}
 	return Token{kind, lit, pos}
-}
+}
--- a/core/encoding/xml/xml_reader.odin
+++ b/core/encoding/xml/xml_reader.odin
@@ -1,28 +1,28 @@
 /*
-	An XML 1.0 / 1.1 parser
+ XML 1.0 / 1.1 parser

-	Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
-	Made available under Odin's BSD-3 license.
+ 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
+ available under Odin's BSD-3 license.

-	A from-scratch XML implementation, loosely modelled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
+ from-scratch XML implementation, loosely modelled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).

-	Features:
-		- Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage.
-		- Simple to understand and use. Small.
+Features:
+- Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage.
+- Simple to understand and use. Small.

-	Caveats:
-		- We do NOT support HTML in this package, as that may or may not be valid XML.
-		  If it works, great. If it doesn't, that's not considered a bug.
+Caveats:
+- We do NOT support HTML in this package, as that may or may not be valid XML.
+  If it works, great. If it doesn't, that's not considered a bug.

-		- We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences.
-		- <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options.
+- We do NOT support UTF-16. If you have a UTF-16 XML file, please convert it to UTF-8 first. Also, our condolences.
+- <[!ELEMENT and <[!ATTLIST are not supported, and will be either ignored or return an error depending on the parser options.

-	MAYBE:
-	- XML writer?
-	- Serialize/deserialize Odin types?
+MAYBE:
+- XML writer?
+- Serialize/deserialize Odin types?

-	List of contributors:
-		Jeroen van Rijn: Initial implementation.
+List of contributors:
+- Jeroen van Rijn: Initial implementation.
 */
 package xml
 // An XML 1.0 / 1.1 parser
@@ -43,48 +43,32 @@ DEFAULT_OPTIONS :: Options{
 }

 Option_Flag :: enum {
-	/*
-		If the caller says that input may be modified, we can perform in-situ parsing.
-		If this flag isn't provided, the XML parser first duplicates the input so that it can.
-	*/
+	// If the caller says that input may be modified, we can perform in-situ parsing.
+	// If this flag isn't provided, the XML parser first duplicates the input so that it can.
 	Input_May_Be_Modified,

-	/*
-		Document MUST start with `<?xml` prologue.
-	*/
+	// Document MUST start with `<?xml` prologue.
 	Must_Have_Prolog,

-	/*
-		Document MUST have a `<!DOCTYPE`.
-	*/
+	// Document MUST have a `<!DOCTYPE`.
 	Must_Have_DocType,

-	/*
-		By default we skip comments. Use this option to intern a comment on a parented Element.
-	*/
+	// By default we skip comments. Use this option to intern a comment on a parented Element.
 	Intern_Comments,

-	/*
-		How to handle unsupported parts of the specification, like <! other than <!DOCTYPE and <![CDATA[
-	*/
+	// How to handle unsupported parts of the specification, like <! other than <!DOCTYPE and <![CDATA[
 	Error_on_Unsupported,
 	Ignore_Unsupported,

-	/*
-		By default CDATA tags are passed-through as-is.
-		This option unwraps them when encountered.
-	*/
+	// By default CDATA tags are passed-through as-is.
+	// This option unwraps them when encountered.
 	Unbox_CDATA,

-	/*
-		By default SGML entities like `&gt;`, `&#32;` and `&#x20;` are passed-through as-is.
-		This option decodes them when encountered.
-	*/
+	// By default SGML entities like `&gt;`, `&#32;` and `&#x20;` are passed-through as-is.
+	// This option decodes them when encountered.
 	Decode_SGML_Entities,

-	/*
-		If a tag body has a comment, it will be stripped unless this option is given.
-	*/
+	// If a tag body has a comment, it will be stripped unless this option is given.
 	Keep_Tag_Body_Comments,
 }
 Option_Flags :: bit_set[Option_Flag; u16]
@@ -97,28 +81,20 @@ Document :: struct {
 	encoding: Encoding,

 	doctype: struct {
-		/*
-			We only scan the <!DOCTYPE IDENT part and skip the rest.
-		*/
+		// We only scan the <!DOCTYPE IDENT part and skip the rest.
 		ident:   string,
 		rest:    string,
 	},

-	/*
-		If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live.
-		Otherwise they'll be in the element tree.
-	*/
+	// If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live.
+	// Otherwise they'll be in the element tree.
 	comments: [dynamic]string,

-	/*
-		Internal
-	*/
+	// Internal
 	tokenizer: ^Tokenizer,
 	allocator: mem.Allocator,

-	/*
-		Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified.
-	*/
+	// Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified.
 	input:           []u8,
 	strings_to_free: [dynamic]string,
 }
@@ -158,34 +134,24 @@ Encoding :: enum {
 	UTF_8,
 	ISO_8859_1,

-	/*
-		Aliases
-	*/
+	// Aliases
 	LATIN_1 = ISO_8859_1,
 }

 Error :: enum {
-	/*
-		General return values.
-	*/
+	// General return values.
 	None = 0,
 	General_Error,
 	Unexpected_Token,
 	Invalid_Token,

-	/*
-		Couldn't find, open or read file.
-	*/
+	// Couldn't find, open or read file.
 	File_Error,

-	/*
-		File too short.
-	*/
+	// File too short.
 	Premature_EOF,

-	/*
-		XML-specific errors.
-	*/
+	// XML-specific errors.
 	No_Prolog,
 	Invalid_Prolog,
 	Too_Many_Prologs,
@@ -194,11 +160,9 @@ Error :: enum {
 	Too_Many_DocTypes,
 	DocType_Must_Preceed_Elements,

-	/*
-		If a DOCTYPE is present _or_ the caller
-		asked for a specific DOCTYPE and the DOCTYPE
-		and root tag don't match, we return `.Invalid_DocType`.
-	*/
+	// If a DOCTYPE is present _or_ the caller
+	// asked for a specific DOCTYPE and the DOCTYPE
+	// and root tag don't match, we return `.Invalid_DocType`.
 	Invalid_DocType,

 	Invalid_Tag_Value,
@@ -211,27 +175,20 @@ Error :: enum {
 	Unsupported_Version,
 	Unsupported_Encoding,

-	/*
-		<!FOO are usually skipped.
-	*/
+	// <!FOO are usually skipped.
 	Unhandled_Bang,

 	Duplicate_Attribute,
 	Conflicting_Options,
 }

-/*
-	Implementation starts here.
-*/
 parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
 	data := data
 	context.allocator = allocator

 	opts := validate_options(options) or_return

-	/*
-		If `.Input_May_Be_Modified` is not specified, we duplicate the input so that we can modify it in-place.
-	*/
+	// If `.Input_May_Be_Modified` is not specified, we duplicate the input so that we can modify it in-place.
 	if .Input_May_Be_Modified not_in opts.flags {
 		data = bytes.clone(data)
 	}
@@ -252,10 +209,8 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 	element, parent: Element_ID
 	open: Token

-	/*
-		If a DOCTYPE is present, the root tag has to match.
-		If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match.
-	*/
+	// If a DOCTYPE is present, the root tag has to match.
+	// If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match.
 	expected_doctype := options.expected_doctype

 	loop: for {
@@ -263,17 +218,13 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 		// NOTE(Jeroen): This is faster as a switch.
 		switch t.ch {
 		case '<':
-			/*
-				Consume peeked `<`
-			*/
+			// Consume peeked `<`
 			advance_rune(t)

 			open = scan(t)
 			// NOTE(Jeroen): We're not using a switch because this if-else chain ordered by likelihood is 2.5% faster at -o:size and -o:speed.
 			if likely(open.kind, Token_Kind.Ident) == .Ident {
-				/*
-					e.g. <odin - Start of new element.
-				*/
+				// e.g. <odin - Start of new element.
 				element = new_element(doc)
 				if element == 0 { // First Element
 					parent = element
@@ -286,11 +237,9 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha

 				parse_attributes(doc, &doc.elements[element].attribs) or_return

-				/*
-					If a DOCTYPE is present _or_ the caller
-					asked for a specific DOCTYPE and the DOCTYPE
-					and root tag don't match, we return .Invalid_Root_Tag.
-				*/
+				// If a DOCTYPE is present _or_ the caller
+				// asked for a specific DOCTYPE and the DOCTYPE
+				// and root tag don't match, we return .Invalid_Root_Tag.
 				if element == 0 { // Root tag?
 					if len(expected_doctype) > 0 && expected_doctype != open.text {
 						error(t, t.offset, "Root Tag doesn't match DOCTYPE. Expected: %v, got: %v\n", expected_doctype, open.text)
@@ -298,23 +247,17 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 					}
 				}

-				/*
-					One of these should follow:
-					- `>`,  which means we've just opened this tag and expect a later element to close it.
-					- `/>`, which means this is an 'empty' or self-closing tag.
-				*/
+				// One of these should follow:
+				// - `>`,  which means we've just opened this tag and expect a later element to close it.
+				// - `/>`, which means this is an 'empty' or self-closing tag.
 				end_token := scan(t)
 				#partial switch end_token.kind {
 				case .Gt:
-					/*
-						We're now the new parent.
-					*/
+					// We're now the new parent.
 					parent = element

 				case .Slash:
-					/*
-						Empty tag. Close it.
-					*/
+					// Empty tag. Close it.
 					expect(t, .Gt) or_return
 					parent      = doc.elements[element].parent
 					element     = parent
@@ -325,9 +268,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 				}

 			} else if open.kind == .Slash {
-				/*
-					Close tag.
-				*/
+				// Close tag.
 				ident := expect(t, .Ident) or_return
 				_      = expect(t, .Gt)    or_return

@@ -339,9 +280,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 				element     = parent

 			} else if open.kind == .Exclaim {
-				/*
-					<!
-				*/
+				// <!
 				next := scan(t)
 				#partial switch next.kind {
 				case .Ident:
@@ -370,10 +309,8 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 					}

 				case .Dash:
-					/*
-						Comment: <!-- -->.
-						The grammar does not allow a comment to end in --->
-					*/
+					// Comment: <!-- -->.
+					// The grammar does not allow a comment to end in --->
 					expect(t, .Dash)
 					comment := scan_comment(t) or_return

@@ -395,23 +332,17 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 				}

 			} else if open.kind == .Question {
-				/*
-					<?xml
-				*/
+				// <?xml
 				next := scan(t)
 				#partial switch next.kind {
 				case .Ident:
 					if len(next.text) == 3 && strings.equal_fold(next.text, "xml") {
 						parse_prologue(doc) or_return
 					} else if len(doc.prologue) > 0 {
-						/*
-							We've already seen a prologue.
-						*/
+						// We've already seen a prologue.
 						return doc, .Too_Many_Prologs
 					} else {
-						/*
-							Could be `<?xml-stylesheet`, etc. Ignore it.
-						*/
+						// Could be `<?xml-stylesheet`, etc. Ignore it.
 						skip_element(t) or_return
 					}
 				case:
@@ -425,15 +356,11 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
 			}

 		case -1:
-			/*
-				End of file.
-			*/
+			// End of file.
 			break loop

 		case:
-			/*
-				This should be a tag's body text.
-			*/
+			// This should be a tag's body text.
 			body_text        := scan_string(t, t.offset) or_return
 			needs_processing := .Unbox_CDATA          in opts.flags
 			needs_processing |= .Decode_SGML_Entities in opts.flags
@@ -613,9 +540,7 @@ parse_prologue :: proc(doc: ^Document) -> (err: Error) {
 				doc.encoding = .LATIN_1

 			case:
-				/*
-					Unrecognized encoding, assume UTF-8.
-				*/
+				// Unrecognized encoding, assume UTF-8.
 				error(t, offset, "[parse_prologue] Warning: Unrecognized encoding: %v\n", attr.val)
 			}

@@ -658,11 +583,11 @@ skip_element :: proc(t: ^Tokenizer) -> (err: Error) {

 parse_doctype :: proc(doc: ^Document) -> (err: Error) {
 	/*
-		<!DOCTYPE greeting SYSTEM "hello.dtd">
+	<!DOCTYPE greeting SYSTEM "hello.dtd">

-		<!DOCTYPE greeting [
-			<!ELEMENT greeting (#PCDATA)>
-		]>
+	<!DOCTYPE greeting [
+		<!ELEMENT greeting (#PCDATA)>
+	]>
 	*/
 	assert(doc != nil)
 	context.allocator = doc.allocator
@@ -675,9 +600,7 @@ parse_doctype :: proc(doc: ^Document) -> (err: Error) {
 	offset := t.offset
 	skip_element(t) or_return

-	/*
-		-1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it.
-	*/
+	// 	-1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it.
 	doc.doctype.rest = string(t.src[offset : t.offset - 1])
 	return .None
 }
@@ -700,4 +623,4 @@ new_element :: proc(doc: ^Document) -> (id: Element_ID) {
 	cur := doc.element_count
 	doc.element_count += 1
 	return cur
-}
+}