Files
Nim/lib/std/private/win_setenv.nim
havardjohn f4bbf3bf0b Add use of Windows Wide CRT API for env. vars (#20084)
* Add use of Windows Wide CRT API for env. vars

Replaces use of CRT API `getenv` and `putenv` with respectively
`_wgetenv` and `_wputenv`. Motivation is to reliably convert environment
variables to UTF-8, and the wide API is best there, because it's
reliably UTF-16.

Changed the hack in `lib/std/private/win_setenv.nim` by switching the
order of the Unicode and MBCS environment update; Unicode first, MBCS
second. Because `_wgetenv`/`_wputenv` is now used, the Unicode
environment will be initialized, so it should always be updated.

Stop updating MBCS environment with the name of `getEnv`. It's not
necessarily true that MBCS encoding and the `string` encoding is the
same. Instead convert UTF-16 to current Windows code page with
`wcstombs`, and use that string to update MBCS.

Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or
`std/os.getEnv` on Windows to return non-UTF-8 encoded strings.

Add tests that test environment variables with Unicode characters in
their name or value.

* Fix test issues

Fixes

* `nim cpp` didn't compile the tests
* Nimscript import of `tosenv.nim` from `test_nimscript.nims` failed
  with "cannot importc"

* Fix missing error check on `wcstombs`

* Fix ANSI testing errors

* Separate ANSI-related testing to their own tests, and only executing
  them if running process has a specific code page
  * Setting locale with `setlocale` was not reliable and didn't work on
    certain machines
* Add handling of a "no character representation" error in second
  `wcstombs` call

* tests/newruntime_misc: Increment allocCount

Increments overall allocations in `tnewruntime_misc` test. This is
because `getEnv` now does an additional allocation: allocation of the
UTF-16 string used as parameter to `c_wgetenv`.

* Revert "tests/newruntime_misc: Increment allocCount"

This reverts commit 4d4fe8bd3e.

* tests/newruntime_misc: Increment allocCount on Windows

Increments overall allocations in `tnewruntime_misc` test for Windows.
This is because `getEnv` on Windows now does an additional allocation:
allocation of the UTF-16 string used as parameter to `c_wgetenv`.

* Refactor, adding suggestions from code review

Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>

* Document, adding suggestions

Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>

Co-authored-by: ringabout <43030857+ringabout@users.noreply.github.com>
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
2022-08-20 04:30:11 -04:00

104 lines
4.0 KiB
Nim

#[
Copyright (c) Facebook, Inc. and its affiliates.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Adapted `setenv` from https://github.com/facebook/folly/blob/master/folly/portability/Stdlib.cpp
translated from C to nim.
]#
#[
Introduced in https://github.com/facebook/folly/commit/5d8ca09a3f96afefb44e35808f03651a096ab9c7
TODO:
check errno_t vs cint
]#
when not defined(windows): discard
else:
type wchar_t {.importc: "wchar_t".} = int16
proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {.
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.}
# same as winlean.setEnvironmentVariableA
proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "<stdlib.h>".}
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
var errno {.importc, header: "<errno.h>".}: cint
var genviron {.importc: "_environ".}: ptr ptr char
# xxx `ptr UncheckedArray[WideCString]` did not work
proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "<stdlib.h>".}
# xxx cint vs errno_t?
proc setEnvImpl*(name: string, value: string, overwrite: cint): cint =
const EINVAL = cint(22)
let wideName = newWideCString(name)
if overwrite == 0 and c_wgetenv(wideName) != nil:
return 0
if value != "":
let envstring = name & "=" & value
let e = c_wputenv(newWideCString(envstring))
if e != 0:
errno = EINVAL
return -1
return 0
#[
We are trying to set the value to an empty string, but `_putenv` deletes
entries if the value is an empty string, and just calling
SetEnvironmentVariableA doesn't update `_environ`,
so we have to do these terrible things.
]#
let envstring = name & "= "
if c_wputenv(newWideCString(envstring)) != 0:
errno = EINVAL
return -1
# Here lies the documentation we blatently ignore to make this work.
var s = c_wgetenv(wideName)
s[0] = Utf16Char('\0')
#[
This would result in a double null termination, which normally signifies the
end of the environment variable list, so we stick a completely empty
environment variable into the list instead.
]#
s = c_wgetenv(wideName)
s[1] = Utf16Char('=')
#[
If genviron is null, the MBCS environment has not been initialized
yet, and we don't need to try to update it. We have to do this otherwise
we'd be forcing the initialization and maintenance of the MBCS environment
even though it's never actually used in most programs.
]#
if genviron != nil:
# wcstombs returns `high(csize_t)` if any characters cannot be represented
# in the current codepage. Skip updating MBCS environment in this case.
# For some reason, second `wcstombs` can find non-convertible characters
# that the first `wcstombs` cannot.
let requiredSizeS = wcstombs(nil, wideName, 0)
if requiredSizeS != high(csize_t):
let requiredSize = requiredSizeS.int
var buf = newSeq[char](requiredSize + 1)
let buf2 = buf[0].addr
if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) != high(csize_t):
var ptrToEnv = c_getenv(buf2)
ptrToEnv[0] = '\0'
ptrToEnv = c_getenv(buf2)
ptrToEnv[1] = '='
# And now, we have to update the outer environment to have a proper empty value.
if setEnvironmentVariableW(wideName, value.newWideCString) == 0:
errno = EINVAL
return -1
return 0