diff --git a/src/os/i18n.zig b/src/os/i18n.zig index 69b222da5..dc72d3f5d 100644 --- a/src/os/i18n.zig +++ b/src/os/i18n.zig @@ -81,6 +81,46 @@ pub fn _(msgid: [*:0]const u8) [*:0]const u8 { return dgettext(build_config.bundle_id, msgid); } +/// Canonicalize a locale name from a platform-specific value to +/// a POSIX-compliant value. This is a thin layer over the unexported +/// gnulib-lib function in gettext that does this already. +/// +/// The gnulib-lib function modifies the buffer in place but has +/// zero bounds checking, so we do a bit extra to ensure we don't +/// overflow the buffer. This is likely slightly more expensive but +/// this isn't a hot path so it should be fine. +/// +/// The buffer must be at least 16 bytes long. This ensures we can +/// fit the longest possible hardcoded locale name. Additionally, +/// it should be at least as long as locale in case the locale +/// is unchanged. +/// +/// Here is the logic for macOS, but other platforms also have +/// their own canonicalization logic: +/// +/// https://github.com/coreutils/gnulib/blob/5b92dd0a45c8d27f13a21076b57095ea5e220870/lib/localename.c#L1171 +pub fn canonicalizeLocale( + buf: []u8, + locale: []const u8, +) error{NoSpaceLeft}![:0]const u8 { + // Buffer must be 16 or at least as long as the locale and null term + if (buf.len < @max(16, locale.len + 1)) return error.NoSpaceLeft; + + // Copy our locale into the buffer since it modifies in place. + // This must be null-terminated. + @memcpy(buf[0..locale.len], locale); + buf[locale.len] = 0; + + _libintl_locale_name_canonicalize(buf[0..locale.len :0]); + + // Convert the null-terminated result buffer into a slice. We + // need to search for the null terminator and slice it back. + // We have to use `buf` since `slice` len will exclude the + // null. + const slice = std.mem.sliceTo(buf, 0); + return buf[0..slice.len :0]; +} + /// This can be called at any point a compile-time-known locale is /// available. This will use comptime to verify the locale is supported. pub fn staticLocale(comptime v: [*:0]const u8) [*:0]const u8 { @@ -100,3 +140,23 @@ pub fn staticLocale(comptime v: [*:0]const u8) [*:0]const u8 { extern fn bindtextdomain(domainname: [*:0]const u8, dirname: [*:0]const u8) ?[*:0]const u8; extern fn textdomain(domainname: [*:0]const u8) ?[*:0]const u8; extern fn dgettext(domainname: [*:0]const u8, msgid: [*:0]const u8) [*:0]const u8; + +// This is only available if we're building libintl from source +// since its otherwise not exported. We only need it on macOS +// currently but probably will on Windows as well. +extern fn _libintl_locale_name_canonicalize(name: [*:0]u8) void; + +test "canonicalizeLocale darwin" { + if (!builtin.target.isDarwin()) return error.SkipZigTest; + + const testing = std.testing; + var buf: [256]u8 = undefined; + try testing.expectEqualStrings("en_US", try canonicalizeLocale(&buf, "en_US")); + try testing.expectEqualStrings("zh_CN", try canonicalizeLocale(&buf, "zh-Hans")); + try testing.expectEqualStrings("zh_TW", try canonicalizeLocale(&buf, "zh-Hant")); + + // This is just an edge case I want to make sure we're aware of: + // canonicalizeLocale does not handle encodings and will turn them into + // underscores. We should parse them out before calling this function. + try testing.expectEqualStrings("en_US.UTF_8", try canonicalizeLocale(&buf, "en_US.UTF-8")); +} diff --git a/src/os/locale.zig b/src/os/locale.zig index 840687143..473e50399 100644 --- a/src/os/locale.zig +++ b/src/os/locale.zig @@ -91,19 +91,113 @@ fn setLangFromCocoa() void { const z_lang = std.mem.sliceTo(c_lang, 0); const z_country = std.mem.sliceTo(c_country, 0); - // Format them into a buffer - var buf: [128]u8 = undefined; - const env_value = std.fmt.bufPrintZ(&buf, "{s}_{s}.UTF-8", .{ z_lang, z_country }) catch |err| { - log.warn("error setting locale from system. err={}", .{err}); - return; - }; - log.info("detected system locale={s}", .{env_value}); + // Format our locale as "_.UTF-8" and set it as LANG. + { + var buf: [128]u8 = undefined; + const env_value = std.fmt.bufPrintZ(&buf, "{s}_{s}.UTF-8", .{ z_lang, z_country }) catch |err| { + log.warn("error setting locale from system. err={}", .{err}); + return; + }; + log.info("detected system locale={s}", .{env_value}); - // Set it onto our environment - if (internal_os.setenv("LANG", env_value) < 0) { - log.warn("error setting locale env var", .{}); - return; + // Set it onto our environment + if (internal_os.setenv("LANG", env_value) < 0) { + log.warn("error setting locale env var", .{}); + return; + } } + + // Get our preferred languages and set that to the LANGUAGE + // env var in case our language differs from our locale. + var buf: [1024]u8 = undefined; + if (preferredLanguageFromCocoa(&buf, NSLocale)) |pref_| { + if (pref_) |pref| { + log.debug( + "setting LANGUAGE from preferred languages value={s}", + .{pref}, + ); + _ = internal_os.setenv("LANGUAGE", pref); + } + } else |err| { + log.warn("error getting preferred languages. err={}", .{err}); + } +} + +/// Sets the LANGUAGE environment variable based on the preferred languages +/// as reported by NSLocale. +/// +/// macOS has a concept of preferred languages separate from the system +/// locale. The set of preferred languages is a list in priority order +/// of what translations the user prefers. A user can have, for example, +/// "fr_FR" as their locale but "en" as their preferred language. This would +/// mean that they want to use French units, date formats, etc. but they +/// prefer English translations. +/// +/// gettext uses the LANGUAGE environment variable to override only +/// translations and a priority order can be specified by separating +/// the languages with colons. For example, "en:fr" would mean that +/// English translations are preferred but if they are not available +/// then French translations should be used. +/// +/// To further complicate things, Apple reports the languages in BCP-47 +/// format which is not compatible with gettext's POSIX locale format so +/// we have to canonicalize them. +fn preferredLanguageFromCocoa( + buf: []u8, + NSLocale: objc.Class, +) error{NoSpaceLeft}!?[:0]const u8 { + var fbs = std.io.fixedBufferStream(buf); + const writer = fbs.writer(); + + // We need to get our app's preferred languages. These may not + // match the system locale (NSLocale.currentLocale). + const preferred: *macos.foundation.Array = array: { + const ns = NSLocale.msgSend( + objc.Object, + objc.sel("preferredLanguages"), + .{}, + ); + break :array @ptrCast(ns.value); + }; + for (0..preferred.getCount()) |i| { + var str_buf: [255:0]u8 = undefined; + const str = preferred.getValueAtIndex(macos.foundation.String, i); + const c_str = str.cstring(&str_buf, .utf8) orelse { + // I don't think this can happen but if it does then I want + // to know about it if a user has translation issues. + log.warn("failed to convert a preferred language to UTF-8", .{}); + continue; + }; + + // Append our separator if we have any previous languages + if (fbs.pos > 0) { + _ = writer.writeByte(':') catch + return error.NoSpaceLeft; + } + + // Apple languages are in BCP-47 format, and we need to + // canonicalize them to the POSIX format. + const canon = try i18n.canonicalizeLocale( + fbs.buffer[fbs.pos..], + c_str, + ); + fbs.seekBy(@intCast(canon.len)) catch unreachable; + + // The canonicalized locale never contains the encoding and + // all of our translations require UTF-8 so we add that. + _ = writer.writeAll(".UTF-8") catch return error.NoSpaceLeft; + } + + // If we had no preferred languages then we return nothing. + if (fbs.pos == 0) return null; + + // Null terminate it + _ = writer.writeByte(0) catch return error.NoSpaceLeft; + + // Get our slice, this won't be null terminated so we have to + // reslice it with the null terminator. + const slice = fbs.getWritten(); + return slice[0 .. slice.len - 1 :0]; } const LC_ALL: c_int = 6; // from locale.h diff --git a/src/os/main.zig b/src/os/main.zig index 7c961ea13..36833f427 100644 --- a/src/os/main.zig +++ b/src/os/main.zig @@ -52,3 +52,7 @@ pub const OpenType = openpkg.Type; pub const pipe = pipepkg.pipe; pub const resourcesDir = resourcesdir.resourcesDir; pub const ShellEscapeWriter = shell.ShellEscapeWriter; + +test { + _ = i18n; +}