Add use of Windows Wide CRT API for env. vars

Replaces use of CRT API `getenv` and `putenv` with respectively
`_wgetenv` and `_wputenv`. Motivation is to reliably convert environment
variables to UTF-8, and the wide API is best there, because it's
reliably UTF-16.

Changed the hack in `lib/std/private/win_setenv.nim` by switching the
order of the Unicode and MBCS environment update; Unicode first, MBCS
second. Because `_wgetenv`/`_wputenv` is now used, the Unicode
environment will be initialized, so it should always be updated.

Stop updating MBCS environment with the name of `getEnv`. It's not
necessarily true that MBCS encoding and the `string` encoding is the
same. Instead convert UTF-16 to current Windows code page with
`wcstombs`, and use that string to update MBCS.

Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or
`std/os.getEnv` on Windows to return non-UTF-8 encoded strings.

Add tests that test environment variables with Unicode characters in
their name or value.
This commit is contained in:
Håvard Mjaavatten
2022-07-24 11:41:55 +02:00
parent 685bf944aa
commit ae876a4453
5 changed files with 223 additions and 44 deletions

View File

@@ -42,14 +42,18 @@ when not defined(nimscript):
else:
proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
when defined(windows):
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
from std/private/win_setenv import setEnvImpl
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv",
header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString)
else:
proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "<stdlib.h>".}
proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "<stdlib.h>".}
proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): cstring = c_getenv(env)
proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} =
## Returns the value of the `environment variable`:idx: named `key`.
@@ -67,7 +71,7 @@ when not defined(nimscript):
assert getEnv("unknownEnv") == ""
assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist"
let env = c_getenv(key)
let env = getEnvImpl(key)
if env == nil: return default
result = $env
@@ -83,7 +87,7 @@ when not defined(nimscript):
runnableExamples:
assert not existsEnv("unknownEnv")
return c_getenv(key) != nil
return getEnvImpl(key) != nil
proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} =
## Sets the value of the `environment variable`:idx: named `key` to `val`.

View File

@@ -57,15 +57,19 @@ when not defined(nimscript):
else:
proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
when defined(windows):
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
from std/private/win_setenv import setEnvImpl
import winlean
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv",
header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): WideCString = c_wgetenv(env.newWideCString)
else:
proc c_getenv(env: cstring): cstring {.
importc: "getenv", header: "<stdlib.h>".}
proc c_setenv(envname: cstring, envval: cstring, overwrite: cint): cint {.importc: "setenv", header: "<stdlib.h>".}
proc c_unsetenv(env: cstring): cint {.importc: "unsetenv", header: "<stdlib.h>".}
proc getEnvImpl(env: cstring): cstring = c_getenv(env)
proc getEnv*(key: string, default = ""): string {.tags: [ReadEnvEffect].} =
## Returns the value of the `environment variable`:idx: named `key`.
@@ -83,7 +87,7 @@ when not defined(nimscript):
assert getEnv("unknownEnv") == ""
assert getEnv("unknownEnv", "doesn't exist") == "doesn't exist"
let env = c_getenv(key)
let env = getEnvImpl(key)
if env == nil: return default
result = $env
@@ -99,7 +103,7 @@ when not defined(nimscript):
runnableExamples:
assert not existsEnv("unknownEnv")
return c_getenv(key) != nil
return getEnvImpl(key) != nil
proc putEnv*(key, val: string) {.tags: [WriteEnvEffect].} =
## Sets the value of the `environment variable`:idx: named `key` to `val`.

View File

@@ -25,27 +25,28 @@ when not defined(windows): discard
else:
type wchar_t {.importc: "wchar_t".} = int16
proc setEnvironmentVariableA*(lpName, lpValue: cstring): int32 {.
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableA", sideEffect.}
proc setEnvironmentVariableW*(lpName, lpValue: WideCString): int32 {.
stdcall, dynlib: "kernel32", importc: "SetEnvironmentVariableW", sideEffect.}
# same as winlean.setEnvironmentVariableA
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_putenv(envstring: cstring): cint {.importc: "_putenv", header: "<stdlib.h>".}
proc c_wgetenv(varname: ptr wchar_t): ptr wchar_t {.importc: "_wgetenv", header: "<stdlib.h>".}
proc c_getenv(varname: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_wputenv(envstring: WideCString): cint {.importc: "_wputenv", header: "<stdlib.h>".}
proc c_wgetenv(varname: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
var errno {.importc, header: "<errno.h>".}: cint
var gWenviron {.importc: "_wenviron".}: ptr ptr wchar_t
var genviron {.importc: "_environ".}: ptr ptr char
# xxx `ptr UncheckedArray[WideCString]` did not work
proc mbstowcs(wcstr: ptr wchar_t, mbstr: cstring, count: csize_t): csize_t {.importc: "mbstowcs", header: "<stdlib.h>".}
proc wcstombs(wcstr: ptr char, mbstr: WideCString, count: csize_t): csize_t {.importc, header: "<stdlib.h>".}
# xxx cint vs errno_t?
proc setEnvImpl*(name: string, value: string, overwrite: cint): cint =
const EINVAL = cint(22)
if overwrite == 0 and c_getenv(cstring(name)) != nil: return 0
let wideName = name.newWideCString
if overwrite == 0 and c_wgetenv(wideName) != nil: return 0
if value != "":
let envstring = name & "=" & value
let e = c_putenv(cstring(envstring))
let e = c_wputenv(envstring.newWideCString)
if e != 0:
errno = EINVAL
return -1
@@ -57,40 +58,46 @@ else:
so we have to do these terrible things.
]#
let envstring = name & "= "
if c_putenv(cstring(envstring)) != 0:
if c_wputenv(newWideCString(envstring)) != 0:
errno = EINVAL
return -1
# Here lies the documentation we blatently ignore to make this work.
var s = c_getenv(cstring(name))
s[0] = '\0'
var s = c_wgetenv(wideName)
s[0] = '\0'.Utf16Char
#[
This would result in a double null termination, which normally signifies the
end of the environment variable list, so we stick a completely empty
environment variable into the list instead.
]#
s = c_getenv(cstring(name))
s[1] = '='
s = c_wgetenv(wideName)
s[1] = '='.Utf16Char
#[
If gWenviron is null, the wide environment has not been initialized
If genviron is null, the MBCS environment has not been initialized
yet, and we don't need to try to update it. We have to do this otherwise
we'd be forcing the initialization and maintenance of the wide environment
we'd be forcing the initialization and maintenance of the MBCS environment
even though it's never actually used in most programs.
]#
if gWenviron != nil:
# var buf: array[MAX_ENV + 1, WideCString]
let requiredSize = mbstowcs(nil, cstring(name), 0).int
var buf = newSeq[Utf16Char](requiredSize + 1)
let buf2 = cast[ptr wchar_t](buf[0].addr)
if mbstowcs(buf2, cstring(name), csize_t(requiredSize + 1)) == csize_t(high(uint)):
errno = EINVAL
return -1
var ptrToEnv = cast[WideCString](c_wgetenv(buf2))
ptrToEnv[0] = '\0'.Utf16Char
ptrToEnv = cast[WideCString](c_wgetenv(buf2))
ptrToEnv[1] = '='.Utf16Char
if genviron != nil:
# Wrapping in try-except block, because first `wcstombs` fails with a
# "RangeDefect" if the current codepage cannot represent a character in
# `wideName`. In this case skip updating MBCS environment.
try:
let requiredSize = wcstombs(nil, wideName, 0).int
var buf = newSeq[char](requiredSize + 1)
let buf2 = buf[0].addr
if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) == csize_t(high(uint)):
errno = EINVAL
return -1
var ptrToEnv = c_getenv(buf2)
ptrToEnv[0] = '\0'
ptrToEnv = c_getenv(buf2)
ptrToEnv[1] = '='
except RangeDefect:
discard
# And now, we have to update the outer environment to have a proper empty value.
if setEnvironmentVariableA(cstring(name), cstring(value)) == 0:
if setEnvironmentVariableW(wideName, value.newWideCString) == 0:
errno = EINVAL
return -1
return 0

View File

@@ -8,9 +8,12 @@ import std/envvars
from std/sequtils import toSeq
import stdtest/testutils
# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386)
const unicodeUtf8 = "\xc3\x86"
template main =
block: # delEnv, existsEnv, getEnv, envPairs
for val in ["val", ""]: # ensures empty val works too
for val in ["val", "", unicodeUtf8]: # ensures empty val works too
const key = "NIM_TESTS_TOSENV_KEY"
doAssert not existsEnv(key)
@@ -44,9 +47,12 @@ template main =
main()
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "<stdlib.h>".}
when not defined(js) and not defined(nimscript):
block: # bug #18533
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
var thr: Thread[void]
proc threadFunc {.thread.} = putEnv("foo", "fooVal2")
@@ -54,6 +60,82 @@ when not defined(js) and not defined(nimscript):
doAssert getEnv("foo") == "fooVal1"
createThread(thr, threadFunc)
joinThreads(thr)
doAssert getEnv("foo") == $c_getenv("foo")
doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString)
doAssertRaises(OSError): delEnv("foo=bar")
when defined(windows):
const
LC_ALL = 0
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding
proc setlocale(category: cint, locale: cstring): cstring {.importc, header: "<locale.h>".}
# Set locale required to represent `unicodeAnsi`
discard setlocale(LC_ALL, cstring"English_United States.1252")
block: # Feature #xxx
# These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode
# characters correctly. This means that module X in the process calling the
# CRT environment variable API will get the correct string. Raw CRT API
# calls below represent module X.
# Getting an env. var. with unicode characters returns the correct UTF-8
# encoded string.
block:
const envName = "twin_envvars1"
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
doAssert existsEnv(envName)
doAssert getEnv(envName) == unicodeUtf8
# Putting an env. var. with unicode characters gives the correct UTF-16
# encoded string from low-level routine.
block:
const envName = "twin_envvars2"
putEnv(envName, unicodeUtf8)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
doAssert $c_getenv(envName) == unicodeAnsi
# Env. name containing Unicode characters is retrieved correctly
block:
const envName = unicodeUtf8 & "1"
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
doAssert existsEnv(envName)
doAssert getEnv(envName) == unicodeUtf8
# Env. name containing Unicode characters is set correctly
block:
const
envName = unicodeUtf8 & "2"
envNameAnsi = unicodeAnsi & "2"
putEnv(envName, unicodeUtf8)
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi
# Env. name containing Unicode characters and empty value is set correctly
block:
const
envName = unicodeUtf8 & "3"
envNameAnsi = unicodeAnsi & "3"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert $c_getenv(envNameAnsi.cstring) == ""
# Env. name containing Unicode characters and empty value is set correctly;
# and, if env. name. characters cannot be represented in codepage, don't
# raise an error.
#
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
# Polish_Poland.1250 locale has no representation of `unicodeUtf8`, so the
# conversion will fail, but this must not be fatal. It is expected that the
# routine ignores updating MBCS environment (`environ` global) and carries
# on.
block:
const envName = unicodeUtf8 & "4"
discard setlocale(LC_ALL, cstring"Polish_Poland.1250")
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert getEnv(envName) == ""

View File

@@ -8,9 +8,12 @@ import std/os
from std/sequtils import toSeq
import stdtest/testutils
# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386)
const unicodeUtf8 = "\xc3\x86"
template main =
block: # delEnv, existsEnv, getEnv, envPairs
for val in ["val", ""]: # ensures empty val works too
for val in ["val", "", unicodeUtf8]: # ensures empty val works too
const key = "NIM_TESTS_TOSENV_KEY"
doAssert not existsEnv(key)
@@ -45,9 +48,12 @@ template main =
static: main()
main()
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "<stdlib.h>".}
when not defined(js) and not defined(nimscript):
block: # bug #18533
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
var thr: Thread[void]
proc threadFunc {.thread.} = putEnv("foo", "fooVal2")
@@ -55,6 +61,82 @@ when not defined(js) and not defined(nimscript):
doAssert getEnv("foo") == "fooVal1"
createThread(thr, threadFunc)
joinThreads(thr)
doAssert getEnv("foo") == $c_getenv("foo")
doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString)
doAssertRaises(OSError): delEnv("foo=bar")
when defined(windows):
const
LC_ALL = 0
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding
proc setlocale(category: cint, locale: cstring): cstring {.importc, header: "<locale.h>".}
# Set locale required to represent `unicodeAnsi`
discard setlocale(LC_ALL, cstring"English_United States.1252")
block: # Feature #xxx
# These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode
# characters correctly. This means that module X in the process calling the
# CRT environment variable API will get the correct string. Raw CRT API
# calls below represent module X.
# Getting an env. var. with unicode characters returns the correct UTF-8
# encoded string.
block:
const envName = "twin_envvars1"
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
doAssert existsEnv(envName)
doAssert getEnv(envName) == unicodeUtf8
# Putting an env. var. with unicode characters gives the correct UTF-16
# encoded string from low-level routine.
block:
const envName = "twin_envvars2"
putEnv(envName, unicodeUtf8)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
doAssert $c_getenv(envName) == unicodeAnsi
# Env. name containing Unicode characters is retrieved correctly
block:
const envName = unicodeUtf8 & "1"
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
doAssert existsEnv(envName)
doAssert getEnv(envName) == unicodeUtf8
# Env. name containing Unicode characters is set correctly
block:
const
envName = unicodeUtf8 & "2"
envNameAnsi = unicodeAnsi & "2"
putEnv(envName, unicodeUtf8)
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi
# Env. name containing Unicode characters and empty value is set correctly
block:
const
envName = unicodeUtf8 & "3"
envNameAnsi = unicodeAnsi & "3"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert $c_getenv(envNameAnsi.cstring) == ""
# Env. name containing Unicode characters and empty value is set correctly;
# and, if env. name. characters cannot be represented in codepage, don't
# raise an error.
#
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
# Polish_Poland.1250 locale has no representation of `unicodeUtf8`, so the
# conversion will fail, but this must not be fatal. It is expected that the
# routine ignores updating MBCS environment (`environ` global) and carries
# on.
block:
const envName = unicodeUtf8 & "4"
discard setlocale(LC_ALL, cstring"Polish_Poland.1250")
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert getEnv(envName) == ""