This commit is contained in:
Jeroen van Rijn
2023-07-28 15:53:39 +02:00
parent 5ac7fe453f
commit 683ee75703
7 changed files with 98 additions and 73 deletions

View File

@@ -65,19 +65,21 @@ print_element :: proc(writer: io.Writer, doc: ^Document, element_id: Element_ID,
if element.kind == .Element {
wprintf(writer, "<%v>\n", element.ident)
if len(element.value) > 0 {
tab(writer, indent + 1)
wprintf(writer, "[Value] %v\n", element.value)
for value in element.value {
switch v in value {
case string:
tab(writer, indent + 1)
wprintf(writer, "[Value] %v\n", v)
case Element_ID:
print_element(writer, doc, v, indent + 1)
}
}
for attr in element.attribs {
tab(writer, indent + 1)
wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val)
}
for child in element.children {
print_element(writer, doc, child, indent + 1)
}
} else if element.kind == .Comment {
wprintf(writer, "[COMMENT] %v\n", element.value)
}

View File

@@ -72,10 +72,10 @@ example :: proc() {
return
}
printf("Found `<charlist>` with %v children, %v elements total\n", len(docs[0].elements[charlist].children), docs[0].element_count)
printf("Found `<charlist>` with %v children, %v elements total\n", len(docs[0].elements[charlist].value), docs[0].element_count)
crc32 := doc_hash(docs[0])
printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0xcaa042b9 else "🤬", crc32)
crc32 := doc_hash(docs[0], false)
printf("[%v] CRC32: 0x%08x\n", "🎉" if crc32 == 0x420dbac5 else "🤬", crc32)
for round in 0..<N {
defer xml.destroy(docs[round])

View File

@@ -13,20 +13,25 @@ find_child_by_ident :: proc(doc: ^Document, parent_id: Element_ID, ident: string
tag := doc.elements[parent_id]
count := 0
for child_id in tag.children {
child := doc.elements[child_id]
/*
Skip commments. They have no name.
*/
if child.kind != .Element { continue }
for v in tag.value {
switch child_id in v {
case string: continue
case Element_ID:
child := doc.elements[child_id]
/*
Skip commments. They have no name.
*/
if child.kind != .Element { continue }
/*
If the ident matches and it's the nth such child, return it.
*/
if child.ident == ident {
if count == nth { return child_id, true }
count += 1
/*
If the ident matches and it's the nth such child, return it.
*/
if child.ident == ident {
if count == nth { return child_id, true }
count += 1
}
}
}
return 0, false
}

View File

@@ -125,16 +125,19 @@ Document :: struct {
Element :: struct {
ident: string,
value: string,
value: [dynamic]Value,
attribs: Attributes,
kind: enum {
Element = 0,
Comment,
},
parent: Element_ID,
children: [dynamic]Element_ID,
}
Value :: union {
string,
Element_ID,
}
Attribute :: struct {
@@ -247,9 +250,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
err = .Unexpected_Token
element, parent: Element_ID
tag_is_open := false
first_element := true
open: Token
/*
@@ -275,16 +275,10 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
e.g. <odin - Start of new element.
*/
element = new_element(doc)
tag_is_open = true
if first_element {
/*
First element.
*/
parent = element
first_element = false
if element == 0 { // First Element
parent = element
} else {
append(&doc.elements[parent].children, element)
append(&doc.elements[parent].value, element)
}
doc.elements[element].parent = parent
@@ -324,7 +318,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
expect(t, .Gt) or_return
parent = doc.elements[element].parent
element = parent
tag_is_open = false
case:
error(t, t.offset, "Expected close tag, got: %#v\n", end_token)
@@ -344,7 +337,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
}
parent = doc.elements[element].parent
element = parent
tag_is_open = false
} else if open.kind == .Exclaim {
/*
@@ -392,8 +384,8 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
el := new_element(doc)
doc.elements[el].parent = element
doc.elements[el].kind = .Comment
doc.elements[el].value = comment
append(&doc.elements[element].children, el)
append(&doc.elements[el].value, comment)
append(&doc.elements[element].value, el)
}
}
@@ -436,9 +428,6 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
/*
End of file.
*/
if tag_is_open {
return doc, .Premature_EOF
}
break loop
case:
@@ -450,7 +439,7 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
needs_processing |= .Decode_SGML_Entities in opts.flags
if !needs_processing {
doc.elements[element].value = body_text
append(&doc.elements[element].value, body_text)
continue
}
@@ -472,10 +461,10 @@ parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_ha
decoded, decode_err := entity.decode_xml(body_text, decode_opts)
if decode_err == .None {
doc.elements[element].value = decoded
append(&doc.elements[element].value, decoded)
append(&doc.strings_to_free, decoded)
} else {
doc.elements[element].value = body_text
append(&doc.elements[element].value, body_text)
}
}
}
@@ -518,7 +507,7 @@ destroy :: proc(doc: ^Document) {
for el in doc.elements {
delete(el.attribs)
delete(el.children)
delete(el.value)
}
delete(doc.elements)
@@ -710,6 +699,5 @@ new_element :: proc(doc: ^Document) -> (id: Element_ID) {
cur := doc.element_count
doc.element_count += 1
return cur
}

View File

@@ -71,6 +71,8 @@ Error :: enum {
TS_File_Expected_Source,
TS_File_Expected_Translation,
TS_File_Expected_NumerusForm,
Bad_Str,
Bad_Id,
}

View File

@@ -30,10 +30,26 @@ TS_XML_Options := xml.Options{
parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTIONS, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
context.allocator = allocator
get_str :: proc(val: xml.Value) -> (str: string, err: Error) {
v, ok := val.(string)
if ok {
return v, .None
}
return "", .Bad_Str
}
get_id :: proc(val: xml.Value) -> (str: xml.Element_ID, err: Error) {
v, ok := val.(xml.Element_ID)
if ok {
return v, .None
}
return 0, .Bad_Id
}
ts, xml_err := xml.parse(data, TS_XML_Options)
defer xml.destroy(ts)
if xml_err != .None || ts.element_count < 1 || ts.elements[0].ident != "TS" || len(ts.elements[0].children) == 0 {
if xml_err != .None || ts.element_count < 1 || ts.elements[0].ident != "TS" || len(ts.elements[0].value) == 0 {
return nil, .TS_File_Parse_Error
}
@@ -46,10 +62,12 @@ parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTI
section: ^Section
for child_id in ts.elements[0].children {
for value in ts.elements[0].value {
child_id := get_id(value) or_return
// These should be <context>s.
child := ts.elements[child_id]
if child.ident != "context" {
if ts.elements[child_id].ident != "context" {
return translation, .TS_File_Expected_Context
}
@@ -61,7 +79,8 @@ parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTI
section_name, _ := strings.intern_get(&translation.intern, "")
if !options.merge_sections {
section_name, _ = strings.intern_get(&translation.intern, ts.elements[section_name_id].value)
value_text := get_str(ts.elements[section_name_id].value[0]) or_return
section_name, _ = strings.intern_get(&translation.intern, value_text)
}
if section_name not_in translation.k_v {
@@ -92,8 +111,14 @@ parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTI
return translation, .TS_File_Expected_Translation
}
source, _ := strings.intern_get(&translation.intern, ts.elements[source_id].value)
xlat, _ := strings.intern_get(&translation.intern, ts.elements[translation_id].value)
source := get_str(ts.elements[source_id].value[0]) or_return
source, _ = strings.intern_get(&translation.intern, source)
xlat := ""
if !has_plurals {
xlat = get_str(ts.elements[translation_id].value[0]) or_return
xlat, _ = strings.intern_get(&translation.intern, xlat)
}
if source in section {
return translation, .Duplicate_Key
@@ -124,7 +149,8 @@ parse_qt_linguist_from_bytes :: proc(data: []byte, options := DEFAULT_PARSE_OPTI
if !numerus_found {
break
}
numerus, _ := strings.intern_get(&translation.intern, ts.elements[numerus_id].value)
numerus := get_str(ts.elements[numerus_id].value[0]) or_return
numerus, _ = strings.intern_get(&translation.intern, numerus)
section[source][num_plurals] = numerus
num_plurals += 1

View File

@@ -47,7 +47,7 @@ TESTS :: []TEST{
},
expected_doctype = "恥ずべきフクロウ",
},
crc32 = 0x30d82264,
crc32 = 0xe9b62f03,
},
{
@@ -62,7 +62,7 @@ TESTS :: []TEST{
},
expected_doctype = "恥ずべきフクロウ",
},
crc32 = 0xad31d8e8,
crc32 = 0x9c2643ed,
},
{
@@ -77,7 +77,7 @@ TESTS :: []TEST{
},
expected_doctype = "TS",
},
crc32 = 0x7bce2630,
crc32 = 0x859b7443,
},
{
@@ -92,7 +92,7 @@ TESTS :: []TEST{
},
expected_doctype = "xliff",
},
crc32 = 0x43f19d61,
crc32 = 0x3deaf329,
},
{
@@ -107,7 +107,7 @@ TESTS :: []TEST{
},
expected_doctype = "xliff",
},
crc32 = 0x961e7635,
crc32 = 0x0c55e287,
},
{
@@ -118,7 +118,7 @@ TESTS :: []TEST{
},
expected_doctype = "html",
},
crc32 = 0x573c1033,
crc32 = 0x05373317,
},
{
@@ -129,7 +129,7 @@ TESTS :: []TEST{
},
expected_doctype = "html",
},
crc32 = 0x82588917,
crc32 = 0x3b6d4a90,
},
{
@@ -140,7 +140,7 @@ TESTS :: []TEST{
},
expected_doctype = "html",
},
crc32 = 0x5e74d8a6,
crc32 = 0x5be2ffdc,
},
/*
@@ -170,7 +170,7 @@ TESTS :: []TEST{
expected_doctype = "",
},
err = .None,
crc32 = 0xcaa042b9,
crc32 = 0x420dbac5,
},
}
@@ -260,19 +260,21 @@ doc_to_string :: proc(doc: ^xml.Document) -> (result: string) {
if element.kind == .Element {
wprintf(writer, "<%v>\n", element.ident)
if len(element.value) > 0 {
tab(writer, indent + 1)
wprintf(writer, "[Value] %v\n", element.value)
for value in element.value {
switch v in value {
case string:
tab(writer, indent + 1)
wprintf(writer, "[Value] %v\n", v)
case xml.Element_ID:
print_element(writer, doc, v, indent + 1)
}
}
for attr in element.attribs {
tab(writer, indent + 1)
wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val)
}
for child in element.children {
print_element(writer, doc, child, indent + 1)
}
} else if element.kind == .Comment {
wprintf(writer, "[COMMENT] %v\n", element.value)
}