Add use of Windows Wide CRT API for env. vars (#20084)

* Add use of Windows Wide CRT API for env. vars

Replaces use of CRT API `getenv` and `putenv` with respectively
`_wgetenv` and `_wputenv`. Motivation is to reliably convert environment
variables to UTF-8, and the wide API is best there, because it's
reliably UTF-16.

Changed the hack in `lib/std/private/win_setenv.nim` by switching the
order of the Unicode and MBCS environment update; Unicode first, MBCS
second. Because `_wgetenv`/`_wputenv` is now used, the Unicode
environment will be initialized, so it should always be updated.

Stop updating MBCS environment with the name of `getEnv`. It's not
necessarily true that MBCS encoding and the `string` encoding is the
same. Instead convert UTF-16 to current Windows code page with
`wcstombs`, and use that string to update MBCS.

Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or
`std/os.getEnv` on Windows to return non-UTF-8 encoded strings.

Add tests that test environment variables with Unicode characters in
their name or value.

* Fix test issues

Fixes

* `nim cpp` didn't compile the tests
* Nimscript import of `tosenv.nim` from `test_nimscript.nims` failed
  with "cannot importc"

* Fix missing error check on `wcstombs`

* Fix ANSI testing errors

* Separate ANSI-related testing to their own tests, and only executing
  them if running process has a specific code page
  * Setting locale with `setlocale` was not reliable and didn't work on
    certain machines
* Add handling of a "no character representation" error in second
  `wcstombs` call

* tests/newruntime_misc: Increment allocCount

Increments overall allocations in `tnewruntime_misc` test. This is
because `getEnv` now does an additional allocation: allocation of the
UTF-16 string used as parameter to `c_wgetenv`.

* Revert "tests/newruntime_misc: Increment allocCount"

This reverts commit 4d4fe8bd3e.

* tests/newruntime_misc: Increment allocCount on Windows

Increments overall allocations in `tnewruntime_misc` test for Windows.
This is because `getEnv` on Windows now does an additional allocation:
allocation of the UTF-16 string used as parameter to `c_wgetenv`.

* Refactor, adding suggestions from code review

Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>

* Document, adding suggestions

Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>

Co-authored-by: ringabout <43030857+ringabout@users.noreply.github.com>
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
This commit is contained in:
havardjohn
2022-08-20 10:30:11 +02:00
committed by GitHub
parent 641381e3d4
commit f4bbf3bf0b
6 changed files with 263 additions and 45 deletions

View File

@@ -57,15 +57,19 @@ when not defined(nimscript):
else:
proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
when defined(windows):
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
from std/private/win_setenv import setEnvImpl
import winlean
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv",
header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString)
else:
proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "<stdlib.h>".}
proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): cstring = c_getenv(env)
proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} =
## Returns the value of the `environment variable`:idx: named `key`.
@@ -83,7 +87,7 @@ when not defined(nimscript):
assert getEnv("unknownEnv") == ""
assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist"
let env = c_getenv(key)
let env = getEnvImpl(key)
if env == nil: return default
result = $env
@@ -99,7 +103,7 @@ when not defined(nimscript):
runnableExamples:
assert not existsEnv("unknownEnv")
return c_getenv(key) != nil
return getEnvImpl(key) != nil
proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} =
## Sets the value of the `environment variable`:idx: named `key` to `val`.

View File

@@ -25,27 +25,30 @@ when not defined(windows): discard
else:
type wchar_t {.importc: "wchar_t".} = int16
proc setEnvironmentVariableA*(lpName, lpValue: cstring): int32 {.
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableA", sideEffect.}
proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {.
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.}
# same as winlean.setEnvironmentVariableA
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
proc c_wgetenv(varname: ptr wchar_t): ptr wchar_t {.importc: "_wgetenv", header: "<stdlib.h>".}
proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "<stdlib.h>".}
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
var errno {.importc, header: "<errno.h>".}: cint
var gWenviron {.importc: "_wenviron".}: ptr ptr wchar_t
var genviron {.importc: "_environ".}: ptr ptr char
# xxx `ptr UncheckedArray[WideCString]` did not work
proc mbstowcs(wcstr: ptr wchar_t, mbstr: cstring, count: csize_t): csize_t {.importc: "mbstowcs", header: "<stdlib.h>".}
proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "<stdlib.h>".}
# xxx cint vs errno_t?
proc setEnvImpl*(name: string, value: string, overwrite: cint): cint =
const EINVAL = cint(22)
if overwrite == 0 and c_getenv(cstring(name)) != nil: return 0
let wideName = newWideCString(name)
if overwrite == 0 and c_wgetenv(wideName) != nil:
return 0
if value != "":
let envstring = name & "=" & value
let e = c_putenv(cstring(envstring))
let e = c_wputenv(newWideCString(envstring))
if e != 0:
errno = EINVAL
return -1
@@ -57,40 +60,44 @@ else:
so we have to do these terrible things.
]#
let envstring = name & "= "
if c_putenv(cstring(envstring)) != 0:
if c_wputenv(newWideCString(envstring)) != 0:
errno = EINVAL
return -1
# Here lies the documentation we blatently ignore to make this work.
var s = c_getenv(cstring(name))
s[0] = '\0'
var s = c_wgetenv(wideName)
s[0] = Utf16Char('\0')
#[
This would result in a double null termination, which normally signifies the
end of the environment variable list, so we stick a completely empty
environment variable into the list instead.
]#
s = c_getenv(cstring(name))
s[1] = '='
s = c_wgetenv(wideName)
s[1] = Utf16Char('=')
#[
If gWenviron is null, the wide environment has not been initialized
If genviron is null, the MBCS environment has not been initialized
yet, and we don't need to try to update it. We have to do this otherwise
we'd be forcing the initialization and maintenance of the wide environment
we'd be forcing the initialization and maintenance of the MBCS environment
even though it's never actually used in most programs.
]#
if gWenviron != nil:
# var buf: array[MAX_ENV + 1, WideCString]
let requiredSize = mbstowcs(nil, cstring(name), 0).int
var buf = newSeq[Utf16Char](requiredSize + 1)
let buf2 = cast[ptr wchar_t](buf[0].addr)
if mbstowcs(buf2, cstring(name), csize_t(requiredSize + 1)) == csize_t(high(uint)):
errno = EINVAL
return -1
var ptrToEnv = cast[WideCString](c_wgetenv(buf2))
ptrToEnv[0] = '\0'.Utf16Char
ptrToEnv = cast[WideCString](c_wgetenv(buf2))
ptrToEnv[1] = '='.Utf16Char
if genviron != nil:
# wcstombs returns `high(csize_t)` if any characters cannot be represented
# in the current codepage. Skip updating MBCS environment in this case.
# For some reason, second `wcstombs` can find non-convertible characters
# that the first `wcstombs` cannot.
let requiredSizeS = wcstombs(nil, wideName, 0)
if requiredSizeS != high(csize_t):
let requiredSize = requiredSizeS.int
var buf = newSeq[char](requiredSize + 1)
let buf2 = buf[0].addr
if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) != high(csize_t):
var ptrToEnv = c_getenv(buf2)
ptrToEnv[0] = '\0'
ptrToEnv = c_getenv(buf2)
ptrToEnv[1] = '='
# And now, we have to update the outer environment to have a proper empty value.
if setEnvironmentVariableA(cstring(name), cstring(value)) == 0:
if setEnvironmentVariableW(wideName, value.newWideCString) == 0:
errno = EINVAL
return -1
return 0