From 2fae6eda2321881ccf8d942e2c27e6a7c29aebfd Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Thu, 28 Apr 2022 18:58:49 +0200 Subject: [PATCH] [i18n] Initial i18n support. - Add initial GetText .MO parser - Add translation struct and helpers - Pluralized lookup TODO: - Support for more translation catalog file formats. --- core/i18n/example/i18n_example.odin | 64 +++++++++++ core/i18n/example/messages.pot | 30 +++++ core/i18n/example/nl_NL.mo | Bin 0 -> 672 bytes core/i18n/example/nl_NL.po | 33 ++++++ core/i18n/gettext.odin | 163 ++++++++++++++++++++++++++++ core/i18n/i18n.odin | 116 ++++++++++++++++++++ 6 files changed, 406 insertions(+) create mode 100644 core/i18n/example/i18n_example.odin create mode 100644 core/i18n/example/messages.pot create mode 100644 core/i18n/example/nl_NL.mo create mode 100644 core/i18n/example/nl_NL.po create mode 100644 core/i18n/gettext.odin create mode 100644 core/i18n/i18n.odin diff --git a/core/i18n/example/i18n_example.odin b/core/i18n/example/i18n_example.odin new file mode 100644 index 000000000..f9fb2a353 --- /dev/null +++ b/core/i18n/example/i18n_example.odin @@ -0,0 +1,64 @@ +package i18n_example + +import "core:mem" +import "core:fmt" +import "core:i18n" + +LOC :: i18n.get + +_main :: proc() { + using fmt + + err: i18n.Error + + /* + Parse MO file and set it as the active translation so we can omit `get`'s "catalog" parameter. + */ + i18n.ACTIVE, err = i18n.parse_mo(#load("nl_NL.mo")) + defer i18n.destroy() + + if err != .None { return } + + /* + These are in the .MO catalog. + */ + println("-----") + println(LOC("")) + println("-----") + println(LOC("There are 69,105 leaves here.")) + println("-----") + println(LOC("Hellope, World!")) + + /* + For ease of use, pluralized lookup can use both singular and plural form as key for the same translation. + */ + println("-----") + printf(LOC("There is %d leaf.\n", 1), 1) + printf(LOC("There is %d leaf.\n", 42), 42) + + printf(LOC("There are %d leaves.\n", 1), 1) + printf(LOC("There are %d leaves.\n", 42), 42) + + /* + This isn't. + */ + println("-----") + println(LOC("Come visit us on Discord!")) +} + +main :: proc() { + using fmt + + track: mem.Tracking_Allocator + mem.tracking_allocator_init(&track, context.allocator) + context.allocator = mem.tracking_allocator(&track) + + _main() + + if len(track.allocation_map) > 0 { + println() + for _, v in track.allocation_map { + printf("%v Leaked %v bytes.\n", v.location, v.size) + } + } +} \ No newline at end of file diff --git a/core/i18n/example/messages.pot b/core/i18n/example/messages.pot new file mode 100644 index 000000000..53d521b6b --- /dev/null +++ b/core/i18n/example/messages.pot @@ -0,0 +1,30 @@ +# Odin i18n Example +# Copyright (C) 2021 Jeroen van Rijn +# This file is distributed under the same license as the PACKAGE package. +# Jeroen van Rijn , 2021. +# +#, fuzzy +msgid "" +msgstr "Project-Id-Version: Example 0.0.1\n" + "Report-Msgid-Bugs-To: Jeroen van Rijn \n" + "POT-Creation-Date: 2021-11-27 19:23+0100\n" + "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" + "Last-Translator: FULL NAME \n" + "Language: en-GB\n" + "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=UTF-8\n" + "Content-Transfer-Encoding: 8bit\n" + +#: i18n_example.odin:28 +msgid "There are 69,105 leaves here." +msgstr "Er zijn hier 69.105 bladeren." + +#: i18n_example.odin:30 +msgid "Hellope, World!" +msgstr "Hallo, Wereld!" + +#: i18n_example.odin:36 +msgid "There is %d leaf.\n" +msgid_plural "There are %d leaves.\n" +msgstr[0] "Er is %d blad.\n" +msgstr[1] "Er zijn %d bladeren.\n" \ No newline at end of file diff --git a/core/i18n/example/nl_NL.mo b/core/i18n/example/nl_NL.mo new file mode 100644 index 0000000000000000000000000000000000000000..0b1a668f4d225e8695e479d6135779288870ac39 GIT binary patch literal 672 zcmca7#4?qEfq{XAfq_AWfq}t>fq{XQfq@|gB*?(PP{6>zz|X+IP{_c*Aj`nO(8$2R zpuxbvFrR^ep@orw;RFK%10MqegD(?AK8A^bL6w1lp^AxtfsuiMff;HJ6Vwg{kJOx; z{DM>+h4B2MoD@Zdkc`x#RE5N%R0T6j9YX_Cg`CvHveaS)kgy(9HnUhkH3cM*rpE;p z0%?GVm8BNzaWMoGSt+DVDFkI^jtHkWr8fxbxABqwNfxLFf!CNG}JY+P%tpEGBwjSFf=gW z^7Zs}MYzUL&w$H0Kd&S;uS7SbvLMw;p(M4UM86;>F*DCvAvq(ls5rI6HZ;Ud*8-ug zC^4@%EwxD3H7_|oB{MJGO2Hy2vxLhhF)zI|F+Ei`BsDSDO2I!RGfx2~rT`M-iqLgW z%}XsxEXgmjQV7UTP01`#FxE5R@<}W%(G39^n*)(U3jnTwoYJDi99_5kqTFIDg}eeV zx7gOmS^>he)yPv&v{f+Fw1&IFN+B;N-p_}N!6PvzCtpV)Jhdnl6t%8J3RR#m&B#nG rQZTdB1I2DqPGU-GQEHwZgKLojBwj&6dR!nrShZ@30!Rd`h>HOL6VbdO literal 0 HcmV?d00001 diff --git a/core/i18n/example/nl_NL.po b/core/i18n/example/nl_NL.po new file mode 100644 index 000000000..1b8acbcc1 --- /dev/null +++ b/core/i18n/example/nl_NL.po @@ -0,0 +1,33 @@ +# Odin i18n Example +# Copyright (C) 2021 Jeroen van Rijn +# This file is distributed under the same license as the PACKAGE package. +# Jeroen van Rijn , 2021. +# +msgid "" +msgstr "" +"Project-Id-Version: Example 0.0.1\n" +"Report-Msgid-Bugs-To: Jeroen van Rijn \n" +"POT-Creation-Date: 2021-11-27 19:23+0100\n" +"PO-Revision-Date: 2021-11-28 02:56+0100\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Language-Team: Odin Language Team\n" +"X-Generator: Poedit 3.0\n" +"Last-Translator: Jeroen van Rijn\n" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" +"Language: nl_NL\n" + +#: i18n_example.odin:28 +msgid "There are 69,105 leaves here." +msgstr "Er zijn hier 69.105 bladeren." + +#: i18n_example.odin:30 +msgid "Hellope, World!" +msgstr "Hallo, Wereld!" + +#: i18n_example.odin:36 +msgid "There is %d leaf.\n" +msgid_plural "There are %d leaves.\n" +msgstr[0] "Er is %d blad.\n" +msgstr[1] "Er zijn %d bladeren.\n" diff --git a/core/i18n/gettext.odin b/core/i18n/gettext.odin new file mode 100644 index 000000000..7918e217e --- /dev/null +++ b/core/i18n/gettext.odin @@ -0,0 +1,163 @@ +package i18n +/* + A parser for GNU GetText .MO files. + + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + A from-scratch implementation based after the specification found here: + https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +import "core:os" +import "core:strings" +import "core:bytes" + +parse_mo_from_slice :: proc(data: []u8, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { + context.allocator = allocator + /* + An MO file should have at least a 4-byte magic, 2 x 2 byte version info, + a 4-byte number of strings value, and 2 x 4-byte offsets. + */ + if len(data) < 20 { + return {}, .MO_File_Invalid + } + + /* + Check magic. Should be 0x950412de in native Endianness. + */ + native := true + magic := read_u32(data, native) or_return + + if magic != 0x950412de { + native = false + magic = read_u32(data, native) or_return + + if magic != 0x950412de { return {}, .MO_File_Invalid_Signature } + } + + /* + We can ignore version_minor at offset 6. + */ + version_major := read_u16(data[4:]) or_return + if version_major > 1 { return {}, .MO_File_Unsupported_Version } + + count := read_u32(data[ 8:]) or_return + original_offset := read_u32(data[12:]) or_return + translated_offset := read_u32(data[16:]) or_return + + if count == 0 { return {}, .Empty_Translation_Catalog } + + /* + Initalize Translation, interner and optional pluralizer. + */ + translation = new(Translation) + translation.pluralize = pluralizer + strings.intern_init(&translation.intern, allocator, allocator) + + for n := u32(0); n < count; n += 1 { + /* + Grab string's original length and offset. + */ + offset := original_offset + 8 * n + if len(data) < int(offset + 8) { return translation, .MO_File_Invalid } + + o_length := read_u32(data[offset :], native) or_return + o_offset := read_u32(data[offset + 4:], native) or_return + + offset = translated_offset + 8 * n + if len(data) < int(offset + 8) { return translation, .MO_File_Invalid } + + t_length := read_u32(data[offset :], native) or_return + t_offset := read_u32(data[offset + 4:], native) or_return + + max_offset := int(max(o_offset + o_length + 1, t_offset + t_length + 1)) + if len(data) < max_offset { return translation, .Premature_EOF } + + key := data[o_offset:][:o_length] + val := data[t_offset:][:t_length] + + /* + Could be a pluralized string. + */ + zero := []byte{0} + + keys := bytes.split(key, zero) + vals := bytes.split(val, zero) + + if len(keys) != len(vals) || max(len(keys), len(vals)) > MAX_PLURALS { + return translation, .MO_File_Incorrect_Plural_Count + } + + for k in keys { + interned_key := strings.intern_get(&translation.intern, string(k)) + + interned_vals: [MAX_PLURALS]string = {} + last_val: string + + i := 0 + for v in vals { + interned_vals[i] = strings.intern_get(&translation.intern, string(v)) + last_val = interned_vals[i] + i += 1 + } + for ; i < MAX_PLURALS; i += 1 { + interned_vals[i] = last_val + } + translation.k_v[interned_key] = interned_vals + } + delete(vals) + delete(keys) + } + return +} + +parse_mo_file :: proc(filename: string, pluralizer: proc(int) -> int = nil, allocator := context.allocator) -> (translation: ^Translation, err: Error) { + context.allocator = allocator + + data, data_ok := os.read_entire_file(filename) + defer delete(data) + + if !data_ok { return {}, .File_Error } + + return parse_mo_from_slice(data, pluralizer) +} + +parse_mo :: proc { parse_mo_file, parse_mo_from_slice } + +/* + Helpers. +*/ +read_u32 :: proc(data: []u8, native_endian := true) -> (res: u32, err: Error) { + if len(data) < size_of(u32) { return 0, .Premature_EOF } + + val := (^u32)(raw_data(data))^ + + if native_endian { + return val, .None + } else { + when ODIN_ENDIAN == .Little { + return u32(transmute(u32be)val), .None + } else { + return u32(transmute(u32le)val), .None + } + } +} + +read_u16 :: proc(data: []u8, native_endian := true) -> (res: u16, err: Error) { + if len(data) < size_of(u16) { return 0, .Premature_EOF } + + val := (^u16)(raw_data(data))^ + + if native_endian { + return val, .None + } else { + when ODIN_ENDIAN == .Little { + return u16(transmute(u16be)val), .None + } else { + return u16(transmute(u16le)val), .None + } + } +} \ No newline at end of file diff --git a/core/i18n/i18n.odin b/core/i18n/i18n.odin new file mode 100644 index 000000000..7c72f9858 --- /dev/null +++ b/core/i18n/i18n.odin @@ -0,0 +1,116 @@ +package i18n +/* + Internationalization helpers. + + Copyright 2021 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ +import "core:strings" + +/* + TODO: + - Support for more translation catalog file formats. +*/ + +MAX_PLURALS :: 10 + +/* + Currently active catalog. +*/ +ACTIVE: ^Translation + +/* + The main data structure. This can be generated from various different file formats, as long as we have a parser for them. +*/ +Translation :: struct { + k_v: map[string][MAX_PLURALS]string, + intern: strings.Intern, + + pluralize: proc(number: int) -> int, +} + +Error :: enum { + /* + General return values. + */ + None = 0, + Empty_Translation_Catalog, + + /* + Couldn't find, open or read file. + */ + File_Error, + + /* + File too short. + */ + Premature_EOF, + + /* + GNU Gettext *.MO file errors. + */ + MO_File_Invalid_Signature, + MO_File_Unsupported_Version, + MO_File_Invalid, + MO_File_Incorrect_Plural_Count, +} + +/* + Several ways to use: + - get(key), which defaults to the singular form and i18n.ACTIVE catalog, or + - get(key, number), which returns the appropriate plural from the active catalog, or + - get(key, number, catalog) to grab text from a specific one. +*/ +get :: proc(key: string, number := 0, catalog: ^Translation = ACTIVE) -> (value: string) { + /* + A lot of languages use singular for 1 item and plural for 0 or more than 1 items. This is our default pluralize rule. + */ + plural := 1 if number != 1 else 0 + + if catalog.pluralize != nil { + plural = catalog.pluralize(number) + } + return get_by_slot(key, plural, catalog) +} + +/* + Several ways to use: + - get_by_slot(key), which defaults to the singular form and i18n.ACTIVE catalog, or + - get_by_slot(key, slot), which returns the requested plural from the active catalog, or + - get_by_slot(key, slot, catalog) to grab text from a specific one. + + If a file format parser doesn't (yet) support plural slots, each of the slots will point at the same string. +*/ +get_by_slot :: proc(key: string, slot := 0, catalog: ^Translation = ACTIVE) -> (value: string) { + if catalog == nil { + /* + Return the key if the catalog catalog hasn't been initialized yet. + */ + return key + } + + /* + Return the translation from the requested slot if this key is known, else return the key. + */ + if translations, ok := catalog.k_v[key]; ok { + plural := min(max(0, slot), MAX_PLURALS - 1) + return translations[plural] + } + return key +} + +/* + Same for destroy: + - destroy(), to clean up the currently active catalog catalog i18n.ACTIVE + - destroy(catalog), to clean up a specific catalog. +*/ +destroy :: proc(catalog: ^Translation = ACTIVE) { + if catalog != nil { + strings.intern_destroy(&catalog.intern) + delete(catalog.k_v) + free(catalog) + } +} \ No newline at end of file