[i18n] Initial i18n support.

- Add initial GetText .MO parser
- Add translation struct and helpers
- Pluralized lookup

 TODO:
- Support for more translation catalog file formats.
This commit is contained in:
Jeroen van Rijn
2022-04-28 18:58:49 +02:00
parent e53ba3b116
commit 2fae6eda23
6 changed files with 406 additions and 0 deletions

View File

@@ -0,0 +1,64 @@
package i18n_example
import "core:mem"
import "core:fmt"
import "core:i18n"
LOC :: i18n.get
_main :: proc() {
using fmt
err: i18n.Error
/*
Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter.
*/
i18n.ACTIVE, err = i18n.parse_mo(#load("nl_NL.mo"))
defer i18n.destroy()
if err != .None { return }
/*
These are in the .MO catalog.
*/
println("-----")
println(LOC(""))
println("-----")
println(LOC("There are 69,105 leaves here."))
println("-----")
println(LOC("Hellope, World!"))
/*
For ease of use, pluralized lookup can use both singular and plural form as key for the same translation.
*/
println("-----")
printf(LOC("There is %d leaf.\n", 1), 1)
printf(LOC("There is %d leaf.\n", 42), 42)
printf(LOC("There are %d leaves.\n", 1), 1)
printf(LOC("There are %d leaves.\n", 42), 42)
/*
This isn't.
*/
println("-----")
println(LOC("Come visit us on Discord!"))
}
main :: proc() {
using fmt
track: mem.Tracking_Allocator
mem.tracking_allocator_init(&track, context.allocator)
context.allocator = mem.tracking_allocator(&track)
_main()
if len(track.allocation_map) > 0 {
println()
for _, v in track.allocation_map {
printf("%v Leaked %v bytes.\n", v.location, v.size)
}
}
}

View File

@@ -0,0 +1,30 @@
# Odin i18n Example
# Copyright (C) 2021 Jeroen van Rijn
# This file is distributed under the same license as the PACKAGE package.
# Jeroen van Rijn <Kelimion@users.noreply.github.com>, 2021.
#
#, fuzzy
msgid ""
msgstr "Project-Id-Version: Example 0.0.1\n"
"Report-Msgid-Bugs-To: Jeroen van Rijn <Kelimion@users.noreply.github.com>\n"
"POT-Creation-Date: 2021-11-27 19:23+0100\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: en-GB\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
#: i18n_example.odin:28
msgid "There are 69,105 leaves here."
msgstr "Er zijn hier 69.105 bladeren."
#: i18n_example.odin:30
msgid "Hellope, World!"
msgstr "Hallo, Wereld!"
#: i18n_example.odin:36
msgid "There is %d leaf.\n"
msgid_plural "There are %d leaves.\n"
msgstr[0] "Er is %d blad.\n"
msgstr[1] "Er zijn %d bladeren.\n"

BIN
core/i18n/example/nl_NL.mo Normal file

Binary file not shown.

View File

@@ -0,0 +1,33 @@
# Odin i18n Example
# Copyright (C) 2021 Jeroen van Rijn
# This file is distributed under the same license as the PACKAGE package.
# Jeroen van Rijn <Kelimion@users.noreply.github.com>, 2021.
#
msgid ""
msgstr ""
"Project-Id-Version: Example 0.0.1\n"
"Report-Msgid-Bugs-To: Jeroen van Rijn <Kelimion@users.noreply.github.com>\n"
"POT-Creation-Date: 2021-11-27 19:23+0100\n"
"PO-Revision-Date: 2021-11-28 02:56+0100\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Language-Team: Odin Language Team\n"
"X-Generator: Poedit 3.0\n"
"Last-Translator: Jeroen van Rijn\n"
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
"Language: nl_NL\n"
#: i18n_example.odin:28
msgid "There are 69,105 leaves here."
msgstr "Er zijn hier 69.105 bladeren."
#: i18n_example.odin:30
msgid "Hellope, World!"
msgstr "Hallo, Wereld!"
#: i18n_example.odin:36
msgid "There is %d leaf.\n"
msgid_plural "There are %d leaves.\n"
msgstr[0] "Er is %d blad.\n"
msgstr[1] "Er zijn %d bladeren.\n"

163
core/i18n/gettext.odin Normal file
View File

@@ -0,0 +1,163 @@
package i18n
/*
A parser for GNU GetText .MO files.
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
A from-scratch implementation based after the specification found here:
https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html
List of contributors:
Jeroen van Rijn: Initial implementation.
*/
import "core:os"
import "core:strings"
import "core:bytes"
parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
context.allocator = allocator
/*
An MO file should have at least a 4-byte magic, 2 x 2 byte version info,
a 4-byte number of strings value, and 2 x 4-byte offsets.
*/
if len(data) < 20 {
return {}, .MO_File_Invalid
}
/*
Check magic. Should be 0x950412de in native Endianness.
*/
native := true
magic := read_u32(data, native) or_return
if magic != 0x950412de {
native = false
magic = read_u32(data, native) or_return
if magic != 0x950412de { return {}, .MO_File_Invalid_Signature }
}
/*
We can ignore version_minor at offset 6.
*/
version_major := read_u16(data[4:]) or_return
if version_major > 1 { return {}, .MO_File_Unsupported_Version }
count := read_u32(data[ 8:]) or_return
original_offset := read_u32(data[12:]) or_return
translated_offset := read_u32(data[16:]) or_return
if count == 0 { return {}, .Empty_Translation_Catalog }
/*
Initalize Translation, interner and optional pluralizer.
*/
translation = new(Translation)
translation.pluralize = pluralizer
strings.intern_init(&translation.intern, allocator, allocator)
for n := u32(0); n < count; n += 1 {
/*
Grab string's original length and offset.
*/
offset := original_offset + 8 * n
if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
o_length := read_u32(data[offset :], native) or_return
o_offset := read_u32(data[offset + 4:], native) or_return
offset = translated_offset + 8 * n
if len(data) < int(offset + 8) { return translation, .MO_File_Invalid }
t_length := read_u32(data[offset :], native) or_return
t_offset := read_u32(data[offset + 4:], native) or_return
max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1))
if len(data) < max_offset { return translation, .Premature_EOF }
key := data[o_offset:][:o_length]
val := data[t_offset:][:t_length]
/*
Could be a pluralized string.
*/
zero := []byte{0}
keys := bytes.split(key, zero)
vals := bytes.split(val, zero)
if len(keys) != len(vals) || max(len(keys), len(vals)) > MAX_PLURALS {
return translation, .MO_File_Incorrect_Plural_Count
}
for k in keys {
interned_key := strings.intern_get(&translation.intern, string(k))
interned_vals: [MAX_PLURALS]string = {}
last_val: string
i := 0
for v in vals {
interned_vals[i] = strings.intern_get(&translation.intern, string(v))
last_val = interned_vals[i]
i += 1
}
for ; i < MAX_PLURALS; i += 1 {
interned_vals[i] = last_val
}
translation.k_v[interned_key] = interned_vals
}
delete(vals)
delete(keys)
}
return
}
parse_mo_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) {
context.allocator = allocator
data, data_ok := os.read_entire_file(filename)
defer delete(data)
if !data_ok { return {}, .File_Error }
return parse_mo_from_slice(data, pluralizer)
}
parse_mo :: proc { parse_mo_file, parse_mo_from_slice }
/*
Helpers.
*/
read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) {
if len(data) < size_of(u32) { return 0, .Premature_EOF }
val := (^u32)(raw_data(data))^
if native_endian {
return val, .None
} else {
when ODIN_ENDIAN == .Little {
return u32(transmute(u32be)val), .None
} else {
return u32(transmute(u32le)val), .None
}
}
}
read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) {
if len(data) < size_of(u16) { return 0, .Premature_EOF }
val := (^u16)(raw_data(data))^
if native_endian {
return val, .None
} else {
when ODIN_ENDIAN == .Little {
return u16(transmute(u16be)val), .None
} else {
return u16(transmute(u16le)val), .None
}
}
}

116
core/i18n/i18n.odin Normal file
View File

@@ -0,0 +1,116 @@
package i18n
/*
Internationalization helpers.
Copyright 2021 Jeroen van Rijn <nom@duclavier.com>.
Made available under Odin's BSD-3 license.
List of contributors:
Jeroen van Rijn: Initial implementation.
*/
import "core:strings"
/*
TODO:
- Support for more translation catalog file formats.
*/
MAX_PLURALS :: 10
/*
Currently active catalog.
*/
ACTIVE: ^Translation
/*
The main data structure. This can be generated from various different file formats, as long as we have a parser for them.
*/
Translation :: struct {
k_v: map[string][MAX_PLURALS]string,
intern: strings.Intern,
pluralize: proc(number: int) -> int,
}
Error :: enum {
/*
General return values.
*/
None = 0,
Empty_Translation_Catalog,
/*
Couldn't find, open or read file.
*/
File_Error,
/*
File too short.
*/
Premature_EOF,
/*
GNU Gettext *.MO file errors.
*/
MO_File_Invalid_Signature,
MO_File_Unsupported_Version,
MO_File_Invalid,
MO_File_Incorrect_Plural_Count,
}
/*
Several ways to use:
- get(key), which defaults to the singular form and i18n.ACTIVE catalog, or
- get(key, number), which returns the appropriate plural from the active catalog, or
- get(key, number, catalog) to grab text from a specific one.
*/
get :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
/*
A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule.
*/
plural := 1 if number != 1 else 0
if catalog.pluralize != nil {
plural = catalog.pluralize(number)
}
return get_by_slot(key, plural, catalog)
}
/*
Several ways to use:
- get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or
- get_by_slot(key, slot), which returns the requested plural from the active catalog, or
- get_by_slot(key, slot, catalog) to grab text from a specific one.
If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string.
*/
get_by_slot :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) {
if catalog == nil {
/*
Return the key if the catalog catalog hasn't been initialized yet.
*/
return key
}
/*
Return the translation from the requested slot if this key is known, else return the key.
*/
if translations, ok := catalog.k_v[key]; ok {
plural := min(max(0, slot), MAX_PLURALS - 1)
return translations[plural]
}
return key
}
/*
Same for destroy:
- destroy(), to clean up the currently active catalog catalog i18n.ACTIVE
- destroy(catalog), to clean up a specific catalog.
*/
destroy :: proc(catalog: ^Translation = ACTIVE) {
if catalog != nil {
strings.intern_destroy(&catalog.intern)
delete(catalog.k_v)
free(catalog)
}
}