[i18n] QT Linguist TS reader.

This commit is contained in:
Jeroen van Rijn
2022-04-29 00:29:55 +02:00
parent ba23bfb7b9
commit 1289c96e2c
6 changed files with 243 additions and 42 deletions

View File

@@ -87,7 +87,6 @@ Option_Flag :: enum {
If a tag body has a comment, it will be stripped unless this option is given.
*/
Keep_Tag_Body_Comments,
}
Option_Flags :: bit_set[Option_Flag; u16]

View File

@@ -4,9 +4,9 @@ import "core:mem"
import "core:fmt"
import "core:i18n"
LOC :: i18n.get
_T :: i18n.get
_main :: proc() {
mo :: proc() {
using fmt
err: i18n.Error
@@ -23,27 +23,60 @@ _main :: proc() {
These are in the .MO catalog.
*/
println("-----")
println(LOC(""))
println(_T(""))
println("-----")
println(LOC("There are 69,105 leaves here."))
println(_T("There are 69,105 leaves here."))
println("-----")
println(LOC("Hellope, World!"))
println(_T("Hellope, World!"))
/*
For ease of use, pluralized lookup can use both singular and plural form as key for the same translation.
*/
println("-----")
printf(LOC("There is %d leaf.\n", 1), 1)
printf(LOC("There is %d leaf.\n", 42), 42)
printf(_T("There is %d leaf.\n", 1), 1)
printf(_T("There is %d leaf.\n", 42), 42)
printf(LOC("There are %d leaves.\n", 1), 1)
printf(LOC("There are %d leaves.\n", 42), 42)
printf(_T("There are %d leaves.\n", 1), 1)
printf(_T("There are %d leaves.\n", 42), 42)
/*
This isn't.
*/
println("-----")
println(LOC("Come visit us on Discord!"))
println(_T("Come visit us on Discord!"))
}
qt :: proc() {
using fmt
err: i18n.Error
/*
Parse QT file and set it as the active translation so we can omit `get`'s "catalog" parameter.
*/
i18n.ACTIVE, err = i18n.parse_qt(#load("../../../tests/core/assets/XML/nl_NL-qt-ts.ts"))
defer i18n.destroy()
fmt.printf("parse_qt returned %v\n", err)
if err != .None {
return
}
/*
These are in the .TS catalog.
*/
println("--- Page section ---")
println("Page:Text for translation =", _T("Page", "Text for translation"))
println("-----")
println("Page:Also text to translate =", _T("Page", "Also text to translate"))
println("-----")
println("--- installscript section ---")
println("installscript:99 bottles of beer on the wall =", _T("installscript", "99 bottles of beer on the wall"))
println("-----")
println("--- apple_count section ---")
println("apple_count:%d apple(s) =")
println("\t 1 =", _T("apple_count", "%d apple(s)", 1))
println("\t 42 =", _T("apple_count", "%d apple(s)", 42))
}
main :: proc() {
@@ -53,7 +86,8 @@ main :: proc() {
mem.tracking_allocator_init(&track, context.allocator)
context.allocator = mem.tracking_allocator(&track)
_main()
// mo()
qt()
if len(track.allocation_map) > 0 {
println()

View File

@@ -2,7 +2,7 @@ package i18n
/*
A parser for GNU GetText .MO files.
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
A from-scratch implementation based after the specification found here:

View File

@@ -2,7 +2,7 @@ package i18n
/*
Internationalization helpers.
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
List of contributors:
@@ -26,8 +26,11 @@ MAX_PLURALS :: min(max(#config(ODIN_i18N_MAX_PLURAL_FORMS, 10), 1), 255)
/*
The main data structure. This can be generated from various different file formats, as long as we have a parser for them.
*/
Section :: map[string][]string
Translation :: struct {
k_v: map[string]map[string][]string,
k_v: map[string]Section, // k_v[section][key][plural_form] = ...
intern: strings.Intern,
pluralize: proc(number: int) -> int,
@@ -39,6 +42,7 @@ Error :: enum {
*/
None = 0,
Empty_Translation_Catalog,
Duplicate_Key,
/*
Couldn't find, open or read file.
@@ -57,6 +61,17 @@ Error :: enum {
MO_File_Unsupported_Version,
MO_File_Invalid,
MO_File_Incorrect_Plural_Count,
/*
Qt Linguist *.TS file errors.
*/
TS_File_Parse_Error,
TS_File_Expected_Context,
TS_File_Expected_Context_Name,
TS_File_Expected_Source,
TS_File_Expected_Translation,
TS_File_Expected_NumerusForm,
}
/*
@@ -92,7 +107,7 @@ get_by_section :: proc(section, key: string, number := 0, catalog: ^Translation
if catalog.pluralize != nil {
plural = catalog.pluralize(number)
}
return get_by_slot(key, plural, catalog)
return get_by_slot(section, key, plural, catalog)
}
get :: proc{get_single_section, get_by_section}

153
core/i18n/qt_linguist.odin Normal file
View File

@@ -0,0 +1,153 @@
package i18n
/*
A parser for Qt Linguist TS files.
Copyright 2022 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
A from-scratch implementation based after the specification found here:
https://doc.qt.io/qt-5/linguist-ts-file-format.html
List of contributors:
Jeroen van Rijn: Initial implementation.
*/
import "core:os"
import "core:encoding/xml"
import "core:strings"
TS_XML_Options := xml.Options{
flags = {
.Input_May_Be_Modified,
.Must_Have_Prolog,
.Must_Have_DocType,
.Ignore_Unsupported,
.Unbox_CDATA,
.Decode_SGML_Entities,
},
expected_doctype = "TS",
}
parse_qt_linguist_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
context.allocator = allocator
ts, xml_err := xml.parse(data, TS_XML_Options)
defer xml.destroy(ts)
if xml_err != .None || ts.element_count < 1 || ts.elements[0].ident != "TS" || len(ts.elements[0].children) == 0 {
return nil, .TS_File_Parse_Error
}
/*
Initalize Translation, interner and optional pluralizer.
*/
translation = new(Translation)
translation.pluralize = pluralizer
strings.intern_init(&translation.intern, allocator, allocator)
section: ^Section
for child_id in ts.elements[0].children {
// These should be <context>s.
child := ts.elements[child_id]
if child.ident != "context" {
return translation, .TS_File_Expected_Context
}
// Find section name.
section_name_id, section_name_found := xml.find_child_by_ident(ts, child_id, "name")
if !section_name_found {
return translation, .TS_File_Expected_Context_Name,
}
section_name := ts.elements[section_name_id].value
if section_name not_in translation.k_v {
translation.k_v[section_name] = {}
}
section = &translation.k_v[section_name]
// Find messages in section.
nth: int
for {
message_id, message_found := xml.find_child_by_ident(ts, child_id, "message", nth)
if !message_found {
break
}
numerus_tag, _ := xml.find_attribute_val_by_key(ts, message_id, "numerus")
has_plurals := numerus_tag == "yes"
// We must have a <source> = key
source_id, source_found := xml.find_child_by_ident(ts, message_id, "source")
if !source_found {
return translation, .TS_File_Expected_Source
}
// We must have a <translation>
translation_id, translation_found := xml.find_child_by_ident(ts, message_id, "translation")
if !translation_found {
return translation, .TS_File_Expected_Translation
}
source := ts.elements[source_id]
xlat := ts.elements[translation_id]
if source.value in section {
return translation, .Duplicate_Key
}
if has_plurals {
if xlat.value != "" {
return translation, .TS_File_Expected_NumerusForm
}
num_plurals: int
for {
numerus_id, numerus_found := xml.find_child_by_ident(ts, translation_id, "numerusform", num_plurals)
if !numerus_found {
break
}
num_plurals += 1
}
if num_plurals < 2 {
return translation, .TS_File_Expected_NumerusForm
}
section[source.value] = make([]string, num_plurals)
num_plurals = 0
for {
numerus_id, numerus_found := xml.find_child_by_ident(ts, translation_id, "numerusform", num_plurals)
if !numerus_found {
break
}
numerus := ts.elements[numerus_id]
section[source.value][num_plurals] = strings.intern_get(&translation.intern, numerus.value)
num_plurals += 1
}
} else {
// Single translation
section[source.value] = make([]string, 1)
section[source.value][0] = strings.intern_get(&translation.intern, xlat.value)
}
nth += 1
}
}
return
}
parse_qt_linguist_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
context.allocator = allocator
data, data_ok := os.read_entire_file(filename)
defer delete(data)
if !data_ok { return {}, .File_Error }
return parse_qt_linguist_from_slice(data, pluralizer)
}
parse_qt :: proc { parse_qt_linguist_file, parse_qt_linguist_from_slice }

View File

@@ -2,34 +2,34 @@
<!DOCTYPE TS>
<TS version="2.1" language="nl" sourcelanguage="en">
<context>
<name>Page</name>
<message>
<source>Text for translation</source>
<comment>commenting</comment>
<translation type="obsolete">Tekst om te vertalen</translation>
</message>
<message>
<source>Also text to translate</source>
<extracomment>some text</extracomment>
<translation>Ook tekst om te vertalen</translation>
</message>
<name>Page</name>
<message>
<source>Text for translation</source>
<comment>commenting</comment>
<translation type="obsolete">Tekst om te vertalen</translation>
</message>
<message>
<source>Also text to translate</source>
<extracomment>some text</extracomment>
<translation>Ook tekst om te vertalen</translation>
</message>
</context>
<context>
<name>installscript</name>
<message>
<source>99 bottles of beer on the wall</source>
<oldcomment>some new comments here</oldcomment>
<translation>99 flessen bier op de muur</translation>
</message>
<name>installscript</name>
<message>
<source>99 bottles of beer on the wall</source>
<oldcomment>some new comments here</oldcomment>
<translation>99 flessen bier op de muur</translation>
</message>
</context>
<context>
<name>apple_count</name>
<message numerus="yes">
<source>%d apple(s)</source>
<translation>
<numerusform>%d appel</numerusform>
<numerusform>%d appels</numerusform>
</translation>
</message>
</context>
<name>apple_count</name>
<message numerus="yes">
<source>%d apple(s)</source>
<translation>
<numerusform>%d appel</numerusform>
<numerusform>%d appels</numerusform>
</translation>
</message>
</context>
</TS>