diff --git a/lib/pure/includes/osenv.nim b/lib/pure/includes/osenv.nim index 4a776eb787..e04e14cec1 100644 --- a/lib/pure/includes/osenv.nim +++ b/lib/pure/includes/osenv.nim @@ -42,14 +42,18 @@ when not defined(nimscript): else: - proc c_getenv(env: cstring): cstring {. - importc: "getenv", header: "".} when defined(windows): proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "".} from std/private/win_setenv import setEnvImpl + proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", + header: "".} + proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString) else: + proc c_getenv(env: cstring): cstring {. + importc: "getenv", header: "".} proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "".} - proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "".} + proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "".} + proc getEnvImpl(env: cstring): cstring = c_getenv(env) proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} = ## Returns the value of the `environment variable`:idx: named `key`. @@ -67,7 +71,7 @@ when not defined(nimscript): assert getEnv("unknownEnv") == "" assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist" - let env = c_getenv(key) + let env = getEnvImpl(key) if env == nil: return default result = $env @@ -83,7 +87,7 @@ when not defined(nimscript): runnableExamples: assert not existsEnv("unknownEnv") - return c_getenv(key) != nil + return getEnvImpl(key) != nil proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} = ## Sets the value of the `environment variable`:idx: named `key` to `val`. diff --git a/lib/std/envvars.nim b/lib/std/envvars.nim index 5b135cbd3c..d7706c17d1 100644 --- a/lib/std/envvars.nim +++ b/lib/std/envvars.nim @@ -57,15 +57,19 @@ when not defined(nimscript): else: - proc c_getenv(env: cstring): cstring {. - importc: "getenv", header: "".} when defined(windows): proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "".} from std/private/win_setenv import setEnvImpl import winlean + proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", + header: "".} + proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString) else: + proc c_getenv(env: cstring): cstring {. + importc: "getenv", header: "".} proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "".} proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "".} + proc getEnvImpl(env: cstring): cstring = c_getenv(env) proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} = ## Returns the value of the `environment variable`:idx: named `key`. @@ -83,7 +87,7 @@ when not defined(nimscript): assert getEnv("unknownEnv") == "" assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist" - let env = c_getenv(key) + let env = getEnvImpl(key) if env == nil: return default result = $env @@ -99,7 +103,7 @@ when not defined(nimscript): runnableExamples: assert not existsEnv("unknownEnv") - return c_getenv(key) != nil + return getEnvImpl(key) != nil proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} = ## Sets the value of the `environment variable`:idx: named `key` to `val`. diff --git a/lib/std/private/win_setenv.nim b/lib/std/private/win_setenv.nim index 0dfe0ed46d..4b093e6f7b 100644 --- a/lib/std/private/win_setenv.nim +++ b/lib/std/private/win_setenv.nim @@ -25,27 +25,28 @@ when not defined(windows): discard else: type wchar_t {.importc: "wchar_t".} = int16 - proc setEnvironmentVariableA*(lpName, lpValue: cstring): int32 {. - stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableA", sideEffect.} + proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {. + stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.} # same as winlean.setEnvironmentVariableA - proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} - proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "".} - proc c_wgetenv(varname: ptr wchar_t): ptr wchar_t {.importc: "_wgetenv", header: "".} + proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "".} + proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "".} + proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "".} var errno {.importc, header: "".}: cint - var gWenviron {.importc: "_wenviron".}: ptr ptr wchar_t + var genviron {.importc: "_environ".}: ptr ptr char # xxx `ptr UncheckedArray[WideCString]` did not work - proc mbstowcs(wcstr: ptr wchar_t, mbstr: cstring, count: csize_t): csize_t {.importc: "mbstowcs", header: "".} + proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "".} # xxx cint vs errno_t? proc setEnvImpl*(name: string, value: string, overwrite: cint): cint = const EINVAL = cint(22) - if overwrite == 0 and c_getenv(cstring(name)) != nil: return 0 + let wideName = name.newWideCString + if overwrite == 0 and c_wgetenv(wideName) != nil: return 0 if value != "": let envstring = name & "=" & value - let e = c_putenv(cstring(envstring)) + let e = c_wputenv(envstring.newWideCString) if e != 0: errno = EINVAL return -1 @@ -57,40 +58,46 @@ else: so we have to do these terrible things. ]# let envstring = name & "= " - if c_putenv(cstring(envstring)) != 0: + if c_wputenv(newWideCString(envstring)) != 0: errno = EINVAL return -1 # Here lies the documentation we blatently ignore to make this work. - var s = c_getenv(cstring(name)) - s[0] = '\0' + var s = c_wgetenv(wideName) + s[0] = '\0'.Utf16Char #[ This would result in a double null termination, which normally signifies the end of the environment variable list, so we stick a completely empty environment variable into the list instead. ]# - s = c_getenv(cstring(name)) - s[1] = '=' + s = c_wgetenv(wideName) + s[1] = '='.Utf16Char #[ - If gWenviron is null, the wide environment has not been initialized + If genviron is null, the MBCS environment has not been initialized yet, and we don't need to try to update it. We have to do this otherwise - we'd be forcing the initialization and maintenance of the wide environment + we'd be forcing the initialization and maintenance of the MBCS environment even though it's never actually used in most programs. ]# - if gWenviron != nil: - # var buf: array[MAX_ENV + 1, WideCString] - let requiredSize = mbstowcs(nil, cstring(name), 0).int - var buf = newSeq[Utf16Char](requiredSize + 1) - let buf2 = cast[ptr wchar_t](buf[0].addr) - if mbstowcs(buf2, cstring(name), csize_t(requiredSize + 1)) == csize_t(high(uint)): - errno = EINVAL - return -1 - var ptrToEnv = cast[WideCString](c_wgetenv(buf2)) - ptrToEnv[0] = '\0'.Utf16Char - ptrToEnv = cast[WideCString](c_wgetenv(buf2)) - ptrToEnv[1] = '='.Utf16Char + if genviron != nil: + + # Wrapping in try-except block, because first `wcstombs` fails with a + # "RangeDefect" if the current codepage cannot represent a character in + # `wideName`. In this case skip updating MBCS environment. + try: + let requiredSize = wcstombs(nil, wideName, 0).int + var buf = newSeq[char](requiredSize + 1) + let buf2 = buf[0].addr + if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) == csize_t(high(uint)): + errno = EINVAL + return -1 + var ptrToEnv = c_getenv(buf2) + ptrToEnv[0] = '\0' + ptrToEnv = c_getenv(buf2) + ptrToEnv[1] = '=' + except RangeDefect: + discard # And now, we have to update the outer environment to have a proper empty value. - if setEnvironmentVariableA(cstring(name), cstring(value)) == 0: + if setEnvironmentVariableW(wideName, value.newWideCString) == 0: errno = EINVAL return -1 return 0 diff --git a/tests/stdlib/tenvvars.nim b/tests/stdlib/tenvvars.nim index 406aa3230b..ce8a7a1eb2 100644 --- a/tests/stdlib/tenvvars.nim +++ b/tests/stdlib/tenvvars.nim @@ -8,9 +8,12 @@ import std/envvars from std/sequtils import toSeq import stdtest/testutils +# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386) +const unicodeUtf8 = "\xc3\x86" + template main = block: # delEnv, existsEnv, getEnv, envPairs - for val in ["val", ""]: # ensures empty val works too + for val in ["val", "", unicodeUtf8]: # ensures empty val works too const key = "NIM_TESTS_TOSENV_KEY" doAssert not existsEnv(key) @@ -44,9 +47,12 @@ template main = main() +proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} +proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "".} +proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "".} + when not defined(js) and not defined(nimscript): block: # bug #18533 - proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} var thr: Thread[void] proc threadFunc {.thread.} = putEnv("foo", "fooVal2") @@ -54,6 +60,82 @@ when not defined(js) and not defined(nimscript): doAssert getEnv("foo") == "fooVal1" createThread(thr, threadFunc) joinThreads(thr) - doAssert getEnv("foo") == $c_getenv("foo") + doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString) doAssertRaises(OSError): delEnv("foo=bar") + +when defined(windows): + const + LC_ALL = 0 + unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding + + proc setlocale(category: cint, locale: cstring): cstring {.importc, header: "".} + + # Set locale required to represent `unicodeAnsi` + discard setlocale(LC_ALL, cstring"English_United States.1252") + + block: # Feature #xxx + # These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode + # characters correctly. This means that module X in the process calling the + # CRT environment variable API will get the correct string. Raw CRT API + # calls below represent module X. + + # Getting an env. var. with unicode characters returns the correct UTF-8 + # encoded string. + block: + const envName = "twin_envvars1" + doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0 + doAssert existsEnv(envName) + doAssert getEnv(envName) == unicodeUtf8 + + # Putting an env. var. with unicode characters gives the correct UTF-16 + # encoded string from low-level routine. + block: + const envName = "twin_envvars2" + putEnv(envName, unicodeUtf8) + doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8 + doAssert $c_getenv(envName) == unicodeAnsi + + # Env. name containing Unicode characters is retrieved correctly + block: + const envName = unicodeUtf8 & "1" + doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0 + doAssert existsEnv(envName) + doAssert getEnv(envName) == unicodeUtf8 + + # Env. name containing Unicode characters is set correctly + block: + const + envName = unicodeUtf8 & "2" + envNameAnsi = unicodeAnsi & "2" + putEnv(envName, unicodeUtf8) + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8 + doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi + + # Env. name containing Unicode characters and empty value is set correctly + block: + const + envName = unicodeUtf8 & "3" + envNameAnsi = unicodeAnsi & "3" + putEnv(envName, "") + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == "" + doAssert $c_getenv(envNameAnsi.cstring) == "" + + # Env. name containing Unicode characters and empty value is set correctly; + # and, if env. name. characters cannot be represented in codepage, don't + # raise an error. + # + # `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The + # Polish_Poland.1250 locale has no representation of `unicodeUtf8`, so the + # conversion will fail, but this must not be fatal. It is expected that the + # routine ignores updating MBCS environment (`environ` global) and carries + # on. + block: + const envName = unicodeUtf8 & "4" + discard setlocale(LC_ALL, cstring"Polish_Poland.1250") + putEnv(envName, "") + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == "" + doAssert getEnv(envName) == "" diff --git a/tests/stdlib/tosenv.nim b/tests/stdlib/tosenv.nim index 0a50031a11..4d0acff391 100644 --- a/tests/stdlib/tosenv.nim +++ b/tests/stdlib/tosenv.nim @@ -8,9 +8,12 @@ import std/os from std/sequtils import toSeq import stdtest/testutils +# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386) +const unicodeUtf8 = "\xc3\x86" + template main = block: # delEnv, existsEnv, getEnv, envPairs - for val in ["val", ""]: # ensures empty val works too + for val in ["val", "", unicodeUtf8]: # ensures empty val works too const key = "NIM_TESTS_TOSENV_KEY" doAssert not existsEnv(key) @@ -45,9 +48,12 @@ template main = static: main() main() +proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} +proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "".} +proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "".} + when not defined(js) and not defined(nimscript): block: # bug #18533 - proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "".} var thr: Thread[void] proc threadFunc {.thread.} = putEnv("foo", "fooVal2") @@ -55,6 +61,82 @@ when not defined(js) and not defined(nimscript): doAssert getEnv("foo") == "fooVal1" createThread(thr, threadFunc) joinThreads(thr) - doAssert getEnv("foo") == $c_getenv("foo") + doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString) doAssertRaises(OSError): delEnv("foo=bar") + +when defined(windows): + const + LC_ALL = 0 + unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding + + proc setlocale(category: cint, locale: cstring): cstring {.importc, header: "".} + + # Set locale required to represent `unicodeAnsi` + discard setlocale(LC_ALL, cstring"English_United States.1252") + + block: # Feature #xxx + # These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode + # characters correctly. This means that module X in the process calling the + # CRT environment variable API will get the correct string. Raw CRT API + # calls below represent module X. + + # Getting an env. var. with unicode characters returns the correct UTF-8 + # encoded string. + block: + const envName = "twin_envvars1" + doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0 + doAssert existsEnv(envName) + doAssert getEnv(envName) == unicodeUtf8 + + # Putting an env. var. with unicode characters gives the correct UTF-16 + # encoded string from low-level routine. + block: + const envName = "twin_envvars2" + putEnv(envName, unicodeUtf8) + doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8 + doAssert $c_getenv(envName) == unicodeAnsi + + # Env. name containing Unicode characters is retrieved correctly + block: + const envName = unicodeUtf8 & "1" + doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0 + doAssert existsEnv(envName) + doAssert getEnv(envName) == unicodeUtf8 + + # Env. name containing Unicode characters is set correctly + block: + const + envName = unicodeUtf8 & "2" + envNameAnsi = unicodeAnsi & "2" + putEnv(envName, unicodeUtf8) + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8 + doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi + + # Env. name containing Unicode characters and empty value is set correctly + block: + const + envName = unicodeUtf8 & "3" + envNameAnsi = unicodeAnsi & "3" + putEnv(envName, "") + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == "" + doAssert $c_getenv(envNameAnsi.cstring) == "" + + # Env. name containing Unicode characters and empty value is set correctly; + # and, if env. name. characters cannot be represented in codepage, don't + # raise an error. + # + # `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The + # Polish_Poland.1250 locale has no representation of `unicodeUtf8`, so the + # conversion will fail, but this must not be fatal. It is expected that the + # routine ignores updating MBCS environment (`environ` global) and carries + # on. + block: + const envName = unicodeUtf8 & "4" + discard setlocale(LC_ALL, cstring"Polish_Poland.1250") + putEnv(envName, "") + doAssert existsEnv(envName) + doAssert $c_wgetenv(envName.newWideCString) == "" + doAssert getEnv(envName) == ""