macOS: Set LANGUAGE env var based on macOS preferred language list

Sets the LANGUAGE environment variable based on the preferred languages
as reported by NSLocale.

macOS has a concept of preferred languages separate from the system
locale. The set of preferred languages is a list in priority order
of what translations the user prefers. A user can have, for example,
"fr_FR" as their locale but "en" as their preferred language. This would
mean that they want to use French units, date formats, etc. but they
prefer English translations.

gettext uses the LANGUAGE environment variable to override only
translations and a priority order can be specified by separating
the languages with colons. For example, "en:fr" would mean that
English translations are preferred but if they are not available
then French translations should be used.

To further complicate things, Apple reports the languages in BCP-47
format which is not compatible with gettext's POSIX locale format so
we have to canonicalize them. To canonicalize the languages we use
an internal function from libintl. This isn't normally available but
since we compile from source on macOS we can use it. This isn't
necessary for other platforms.
This commit is contained in:
Mitchell Hashimoto
2025-03-08 10:14:17 -08:00
parent e03e98e106
commit b48fcf33f7
3 changed files with 169 additions and 11 deletions

View File

@@ -81,6 +81,46 @@ pub fn _(msgid: [*:0]const u8) [*:0]const u8 {
return dgettext(build_config.bundle_id, msgid);
}
/// Canonicalize a locale name from a platform-specific value to
/// a POSIX-compliant value. This is a thin layer over the unexported
/// gnulib-lib function in gettext that does this already.
///
/// The gnulib-lib function modifies the buffer in place but has
/// zero bounds checking, so we do a bit extra to ensure we don't
/// overflow the buffer. This is likely slightly more expensive but
/// this isn't a hot path so it should be fine.
///
/// The buffer must be at least 16 bytes long. This ensures we can
/// fit the longest possible hardcoded locale name. Additionally,
/// it should be at least as long as locale in case the locale
/// is unchanged.
///
/// Here is the logic for macOS, but other platforms also have
/// their own canonicalization logic:
///
/// https://github.com/coreutils/gnulib/blob/5b92dd0a45c8d27f13a21076b57095ea5e220870/lib/localename.c#L1171
pub fn canonicalizeLocale(
buf: []u8,
locale: []const u8,
) error{NoSpaceLeft}![:0]const u8 {
// Buffer must be 16 or at least as long as the locale and null term
if (buf.len < @max(16, locale.len + 1)) return error.NoSpaceLeft;
// Copy our locale into the buffer since it modifies in place.
// This must be null-terminated.
@memcpy(buf[0..locale.len], locale);
buf[locale.len] = 0;
_libintl_locale_name_canonicalize(buf[0..locale.len :0]);
// Convert the null-terminated result buffer into a slice. We
// need to search for the null terminator and slice it back.
// We have to use `buf` since `slice` len will exclude the
// null.
const slice = std.mem.sliceTo(buf, 0);
return buf[0..slice.len :0];
}
/// This can be called at any point a compile-time-known locale is
/// available. This will use comptime to verify the locale is supported.
pub fn staticLocale(comptime v: [*:0]const u8) [*:0]const u8 {
@@ -100,3 +140,23 @@ pub fn staticLocale(comptime v: [*:0]const u8) [*:0]const u8 {
extern fn bindtextdomain(domainname: [*:0]const u8, dirname: [*:0]const u8) ?[*:0]const u8;
extern fn textdomain(domainname: [*:0]const u8) ?[*:0]const u8;
extern fn dgettext(domainname: [*:0]const u8, msgid: [*:0]const u8) [*:0]const u8;
// This is only available if we're building libintl from source
// since its otherwise not exported. We only need it on macOS
// currently but probably will on Windows as well.
extern fn _libintl_locale_name_canonicalize(name: [*:0]u8) void;
test "canonicalizeLocale darwin" {
if (!builtin.target.isDarwin()) return error.SkipZigTest;
const testing = std.testing;
var buf: [256]u8 = undefined;
try testing.expectEqualStrings("en_US", try canonicalizeLocale(&buf, "en_US"));
try testing.expectEqualStrings("zh_CN", try canonicalizeLocale(&buf, "zh-Hans"));
try testing.expectEqualStrings("zh_TW", try canonicalizeLocale(&buf, "zh-Hant"));
// This is just an edge case I want to make sure we're aware of:
// canonicalizeLocale does not handle encodings and will turn them into
// underscores. We should parse them out before calling this function.
try testing.expectEqualStrings("en_US.UTF_8", try canonicalizeLocale(&buf, "en_US.UTF-8"));
}

View File

@@ -91,19 +91,113 @@ fn setLangFromCocoa() void {
const z_lang = std.mem.sliceTo(c_lang, 0);
const z_country = std.mem.sliceTo(c_country, 0);
// Format them into a buffer
var buf: [128]u8 = undefined;
const env_value = std.fmt.bufPrintZ(&buf, "{s}_{s}.UTF-8", .{ z_lang, z_country }) catch |err| {
log.warn("error setting locale from system. err={}", .{err});
return;
};
log.info("detected system locale={s}", .{env_value});
// Format our locale as "<lang>_<country>.UTF-8" and set it as LANG.
{
var buf: [128]u8 = undefined;
const env_value = std.fmt.bufPrintZ(&buf, "{s}_{s}.UTF-8", .{ z_lang, z_country }) catch |err| {
log.warn("error setting locale from system. err={}", .{err});
return;
};
log.info("detected system locale={s}", .{env_value});
// Set it onto our environment
if (internal_os.setenv("LANG", env_value) < 0) {
log.warn("error setting locale env var", .{});
return;
// Set it onto our environment
if (internal_os.setenv("LANG", env_value) < 0) {
log.warn("error setting locale env var", .{});
return;
}
}
// Get our preferred languages and set that to the LANGUAGE
// env var in case our language differs from our locale.
var buf: [1024]u8 = undefined;
if (preferredLanguageFromCocoa(&buf, NSLocale)) |pref_| {
if (pref_) |pref| {
log.debug(
"setting LANGUAGE from preferred languages value={s}",
.{pref},
);
_ = internal_os.setenv("LANGUAGE", pref);
}
} else |err| {
log.warn("error getting preferred languages. err={}", .{err});
}
}
/// Sets the LANGUAGE environment variable based on the preferred languages
/// as reported by NSLocale.
///
/// macOS has a concept of preferred languages separate from the system
/// locale. The set of preferred languages is a list in priority order
/// of what translations the user prefers. A user can have, for example,
/// "fr_FR" as their locale but "en" as their preferred language. This would
/// mean that they want to use French units, date formats, etc. but they
/// prefer English translations.
///
/// gettext uses the LANGUAGE environment variable to override only
/// translations and a priority order can be specified by separating
/// the languages with colons. For example, "en:fr" would mean that
/// English translations are preferred but if they are not available
/// then French translations should be used.
///
/// To further complicate things, Apple reports the languages in BCP-47
/// format which is not compatible with gettext's POSIX locale format so
/// we have to canonicalize them.
fn preferredLanguageFromCocoa(
buf: []u8,
NSLocale: objc.Class,
) error{NoSpaceLeft}!?[:0]const u8 {
var fbs = std.io.fixedBufferStream(buf);
const writer = fbs.writer();
// We need to get our app's preferred languages. These may not
// match the system locale (NSLocale.currentLocale).
const preferred: *macos.foundation.Array = array: {
const ns = NSLocale.msgSend(
objc.Object,
objc.sel("preferredLanguages"),
.{},
);
break :array @ptrCast(ns.value);
};
for (0..preferred.getCount()) |i| {
var str_buf: [255:0]u8 = undefined;
const str = preferred.getValueAtIndex(macos.foundation.String, i);
const c_str = str.cstring(&str_buf, .utf8) orelse {
// I don't think this can happen but if it does then I want
// to know about it if a user has translation issues.
log.warn("failed to convert a preferred language to UTF-8", .{});
continue;
};
// Append our separator if we have any previous languages
if (fbs.pos > 0) {
_ = writer.writeByte(':') catch
return error.NoSpaceLeft;
}
// Apple languages are in BCP-47 format, and we need to
// canonicalize them to the POSIX format.
const canon = try i18n.canonicalizeLocale(
fbs.buffer[fbs.pos..],
c_str,
);
fbs.seekBy(@intCast(canon.len)) catch unreachable;
// The canonicalized locale never contains the encoding and
// all of our translations require UTF-8 so we add that.
_ = writer.writeAll(".UTF-8") catch return error.NoSpaceLeft;
}
// If we had no preferred languages then we return nothing.
if (fbs.pos == 0) return null;
// Null terminate it
_ = writer.writeByte(0) catch return error.NoSpaceLeft;
// Get our slice, this won't be null terminated so we have to
// reslice it with the null terminator.
const slice = fbs.getWritten();
return slice[0 .. slice.len - 1 :0];
}
const LC_ALL: c_int = 6; // from locale.h

View File

@@ -52,3 +52,7 @@ pub const OpenType = openpkg.Type;
pub const pipe = pipepkg.pipe;
pub const resourcesDir = resourcesdir.resourcesDir;
pub const ShellEscapeWriter = shell.ShellEscapeWriter;
test {
_ = i18n;
}