mirror of
https://github.com/nim-lang/Nim.git
synced 2026-04-22 23:35:22 +00:00
Add use of Windows Wide CRT API for env. vars (#20084)
* Add use of Windows Wide CRT API for env. vars
Replaces use of CRT API `getenv` and `putenv` with respectively
`_wgetenv` and `_wputenv`. Motivation is to reliably convert environment
variables to UTF-8, and the wide API is best there, because it's
reliably UTF-16.
Changed the hack in `lib/std/private/win_setenv.nim` by switching the
order of the Unicode and MBCS environment update; Unicode first, MBCS
second. Because `_wgetenv`/`_wputenv` is now used, the Unicode
environment will be initialized, so it should always be updated.
Stop updating MBCS environment with the name of `getEnv`. It's not
necessarily true that MBCS encoding and the `string` encoding is the
same. Instead convert UTF-16 to current Windows code page with
`wcstombs`, and use that string to update MBCS.
Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or
`std/os.getEnv` on Windows to return non-UTF-8 encoded strings.
Add tests that test environment variables with Unicode characters in
their name or value.
* Fix test issues
Fixes
* `nim cpp` didn't compile the tests
* Nimscript import of `tosenv.nim` from `test_nimscript.nims` failed
with "cannot importc"
* Fix missing error check on `wcstombs`
* Fix ANSI testing errors
* Separate ANSI-related testing to their own tests, and only executing
them if running process has a specific code page
* Setting locale with `setlocale` was not reliable and didn't work on
certain machines
* Add handling of a "no character representation" error in second
`wcstombs` call
* tests/newruntime_misc: Increment allocCount
Increments overall allocations in `tnewruntime_misc` test. This is
because `getEnv` now does an additional allocation: allocation of the
UTF-16 string used as parameter to `c_wgetenv`.
* Revert "tests/newruntime_misc: Increment allocCount"
This reverts commit 4d4fe8bd3e.
* tests/newruntime_misc: Increment allocCount on Windows
Increments overall allocations in `tnewruntime_misc` test for Windows.
This is because `getEnv` on Windows now does an additional allocation:
allocation of the UTF-16 string used as parameter to `c_wgetenv`.
* Refactor, adding suggestions from code review
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
* Document, adding suggestions
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
Co-authored-by: ringabout <43030857+ringabout@users.noreply.github.com>
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
This commit is contained in:
@@ -57,15 +57,19 @@ when not defined(nimscript):
|
||||
|
||||
else:
|
||||
|
||||
proc c_getenv(env: cstring): cstring {.
|
||||
importc: "getenv", header: "<stdlib.h>".}
|
||||
when defined(windows):
|
||||
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
|
||||
from std/private/win_setenv import setEnvImpl
|
||||
import winlean
|
||||
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv",
|
||||
header: "<stdlib.h>".}
|
||||
proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString)
|
||||
else:
|
||||
proc c_getenv(env: cstring): cstring {.
|
||||
importc: "getenv", header: "<stdlib.h>".}
|
||||
proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "<stdlib.h>".}
|
||||
proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "<stdlib.h>".}
|
||||
proc getEnvImpl(env: cstring): cstring = c_getenv(env)
|
||||
|
||||
proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} =
|
||||
## Returns the value of the `environment variable`:idx: named `key`.
|
||||
@@ -83,7 +87,7 @@ when not defined(nimscript):
|
||||
assert getEnv("unknownEnv") == ""
|
||||
assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist"
|
||||
|
||||
let env = c_getenv(key)
|
||||
let env = getEnvImpl(key)
|
||||
if env == nil: return default
|
||||
result = $env
|
||||
|
||||
@@ -99,7 +103,7 @@ when not defined(nimscript):
|
||||
runnableExamples:
|
||||
assert not existsEnv("unknownEnv")
|
||||
|
||||
return c_getenv(key) != nil
|
||||
return getEnvImpl(key) != nil
|
||||
|
||||
proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} =
|
||||
## Sets the value of the `environment variable`:idx: named `key` to `val`.
|
||||
|
||||
@@ -25,27 +25,30 @@ when not defined(windows): discard
|
||||
else:
|
||||
type wchar_t {.importc: "wchar_t".} = int16
|
||||
|
||||
proc setEnvironmentVariableA*(lpName, lpValue: cstring): int32 {.
|
||||
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableA", sideEffect.}
|
||||
proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {.
|
||||
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.}
|
||||
# same as winlean.setEnvironmentVariableA
|
||||
|
||||
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
|
||||
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
|
||||
proc c_wgetenv(varname: ptr wchar_t): ptr wchar_t {.importc: "_wgetenv", header: "<stdlib.h>".}
|
||||
proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
|
||||
proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "<stdlib.h>".}
|
||||
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
|
||||
|
||||
var errno {.importc, header: "<errno.h>".}: cint
|
||||
var gWenviron {.importc: "_wenviron".}: ptr ptr wchar_t
|
||||
var genviron {.importc: "_environ".}: ptr ptr char
|
||||
# xxx `ptr UncheckedArray[WideCString]` did not work
|
||||
|
||||
proc mbstowcs(wcstr: ptr wchar_t, mbstr: cstring, count: csize_t): csize_t {.importc: "mbstowcs", header: "<stdlib.h>".}
|
||||
proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "<stdlib.h>".}
|
||||
# xxx cint vs errno_t?
|
||||
|
||||
proc setEnvImpl*(name: string, value: string, overwrite: cint): cint =
|
||||
const EINVAL = cint(22)
|
||||
if overwrite == 0 and c_getenv(cstring(name)) != nil: return 0
|
||||
let wideName = newWideCString(name)
|
||||
if overwrite == 0 and c_wgetenv(wideName) != nil:
|
||||
return 0
|
||||
|
||||
if value != "":
|
||||
let envstring = name & "=" & value
|
||||
let e = c_putenv(cstring(envstring))
|
||||
let e = c_wputenv(newWideCString(envstring))
|
||||
if e != 0:
|
||||
errno = EINVAL
|
||||
return -1
|
||||
@@ -57,40 +60,44 @@ else:
|
||||
so we have to do these terrible things.
|
||||
]#
|
||||
let envstring = name & "= "
|
||||
if c_putenv(cstring(envstring)) != 0:
|
||||
if c_wputenv(newWideCString(envstring)) != 0:
|
||||
errno = EINVAL
|
||||
return -1
|
||||
# Here lies the documentation we blatently ignore to make this work.
|
||||
var s = c_getenv(cstring(name))
|
||||
s[0] = '\0'
|
||||
var s = c_wgetenv(wideName)
|
||||
s[0] = Utf16Char('\0')
|
||||
#[
|
||||
This would result in a double null termination, which normally signifies the
|
||||
end of the environment variable list, so we stick a completely empty
|
||||
environment variable into the list instead.
|
||||
]#
|
||||
s = c_getenv(cstring(name))
|
||||
s[1] = '='
|
||||
s = c_wgetenv(wideName)
|
||||
s[1] = Utf16Char('=')
|
||||
#[
|
||||
If gWenviron is null, the wide environment has not been initialized
|
||||
If genviron is null, the MBCS environment has not been initialized
|
||||
yet, and we don't need to try to update it. We have to do this otherwise
|
||||
we'd be forcing the initialization and maintenance of the wide environment
|
||||
we'd be forcing the initialization and maintenance of the MBCS environment
|
||||
even though it's never actually used in most programs.
|
||||
]#
|
||||
if gWenviron != nil:
|
||||
# var buf: array[MAX_ENV + 1, WideCString]
|
||||
let requiredSize = mbstowcs(nil, cstring(name), 0).int
|
||||
var buf = newSeq[Utf16Char](requiredSize + 1)
|
||||
let buf2 = cast[ptr wchar_t](buf[0].addr)
|
||||
if mbstowcs(buf2, cstring(name), csize_t(requiredSize + 1)) == csize_t(high(uint)):
|
||||
errno = EINVAL
|
||||
return -1
|
||||
var ptrToEnv = cast[WideCString](c_wgetenv(buf2))
|
||||
ptrToEnv[0] = '\0'.Utf16Char
|
||||
ptrToEnv = cast[WideCString](c_wgetenv(buf2))
|
||||
ptrToEnv[1] = '='.Utf16Char
|
||||
if genviron != nil:
|
||||
|
||||
# wcstombs returns `high(csize_t)` if any characters cannot be represented
|
||||
# in the current codepage. Skip updating MBCS environment in this case.
|
||||
# For some reason, second `wcstombs` can find non-convertible characters
|
||||
# that the first `wcstombs` cannot.
|
||||
let requiredSizeS = wcstombs(nil, wideName, 0)
|
||||
if requiredSizeS != high(csize_t):
|
||||
let requiredSize = requiredSizeS.int
|
||||
var buf = newSeq[char](requiredSize + 1)
|
||||
let buf2 = buf[0].addr
|
||||
if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) != high(csize_t):
|
||||
var ptrToEnv = c_getenv(buf2)
|
||||
ptrToEnv[0] = '\0'
|
||||
ptrToEnv = c_getenv(buf2)
|
||||
ptrToEnv[1] = '='
|
||||
|
||||
# And now, we have to update the outer environment to have a proper empty value.
|
||||
if setEnvironmentVariableA(cstring(name), cstring(value)) == 0:
|
||||
if setEnvironmentVariableW(wideName, value.newWideCString) == 0:
|
||||
errno = EINVAL
|
||||
return -1
|
||||
return 0
|
||||
|
||||
Reference in New Issue
Block a user