Files
Nim/tests/stdlib/tenvvars.nim
havardjohn f4bbf3bf0b Add use of Windows Wide CRT API for env. vars (#20084)
* Add use of Windows Wide CRT API for env. vars

Replaces use of CRT API `getenv` and `putenv` with respectively
`_wgetenv` and `_wputenv`. Motivation is to reliably convert environment
variables to UTF-8, and the wide API is best there, because it's
reliably UTF-16.

Changed the hack in `lib/std/private/win_setenv.nim` by switching the
order of the Unicode and MBCS environment update; Unicode first, MBCS
second. Because `_wgetenv`/`_wputenv` is now used, the Unicode
environment will be initialized, so it should always be updated.

Stop updating MBCS environment with the name of `getEnv`. It's not
necessarily true that MBCS encoding and the `string` encoding is the
same. Instead convert UTF-16 to current Windows code page with
`wcstombs`, and use that string to update MBCS.

Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or
`std/os.getEnv` on Windows to return non-UTF-8 encoded strings.

Add tests that test environment variables with Unicode characters in
their name or value.

* Fix test issues

Fixes

* `nim cpp` didn't compile the tests
* Nimscript import of `tosenv.nim` from `test_nimscript.nims` failed
  with "cannot importc"

* Fix missing error check on `wcstombs`

* Fix ANSI testing errors

* Separate ANSI-related testing to their own tests, and only executing
  them if running process has a specific code page
  * Setting locale with `setlocale` was not reliable and didn't work on
    certain machines
* Add handling of a "no character representation" error in second
  `wcstombs` call

* tests/newruntime_misc: Increment allocCount

Increments overall allocations in `tnewruntime_misc` test. This is
because `getEnv` now does an additional allocation: allocation of the
UTF-16 string used as parameter to `c_wgetenv`.

* Revert "tests/newruntime_misc: Increment allocCount"

This reverts commit 4d4fe8bd3e.

* tests/newruntime_misc: Increment allocCount on Windows

Increments overall allocations in `tnewruntime_misc` test for Windows.
This is because `getEnv` on Windows now does an additional allocation:
allocation of the UTF-16 string used as parameter to `c_wgetenv`.

* Refactor, adding suggestions from code review

Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>

* Document, adding suggestions

Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>

Co-authored-by: ringabout <43030857+ringabout@users.noreply.github.com>
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
2022-08-20 04:30:11 -04:00

157 lines
5.5 KiB
Nim

discard """
matrix: "--threads:on"
joinable: false
targets: "c js cpp"
"""
import std/envvars
from std/sequtils import toSeq
import stdtest/testutils
# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386)
const unicodeUtf8 = "\xc3\x86"
template main =
block: # delEnv, existsEnv, getEnv, envPairs
for val in ["val", "", unicodeUtf8]: # ensures empty val works too
const key = "NIM_TESTS_TOSENV_KEY"
doAssert not existsEnv(key)
putEnv(key, "tempval")
doAssert existsEnv(key)
doAssert getEnv(key) == "tempval"
putEnv(key, val) # change a key that already exists
doAssert existsEnv(key)
doAssert getEnv(key) == val
doAssert (key, val) in toSeq(envPairs())
delEnv(key)
doAssert (key, val) notin toSeq(envPairs())
doAssert not existsEnv(key)
delEnv(key) # deleting an already deleted env var
doAssert not existsEnv(key)
block:
doAssert getEnv("NIM_TESTS_TOSENV_NONEXISTENT", "") == ""
doAssert getEnv("NIM_TESTS_TOSENV_NONEXISTENT", " ") == " "
doAssert getEnv("NIM_TESTS_TOSENV_NONEXISTENT", "defval") == "defval"
whenVMorJs: discard # xxx improve
do:
doAssertRaises(OSError, putEnv("NIM_TESTS_TOSENV_PUT=DUMMY_VALUE", "NEW_DUMMY_VALUE"))
doAssertRaises(OSError, putEnv("", "NEW_DUMMY_VALUE"))
doAssert not existsEnv("")
doAssert not existsEnv("NIM_TESTS_TOSENV_PUT=DUMMY_VALUE")
doAssert not existsEnv("NIM_TESTS_TOSENV_PUT")
main()
when defined(windows):
proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
when not defined(js) and not defined(nimscript):
block: # bug #18533
var thr: Thread[void]
proc threadFunc {.thread.} = putEnv("foo", "fooVal2")
putEnv("foo", "fooVal1")
doAssert getEnv("foo") == "fooVal1"
createThread(thr, threadFunc)
joinThreads(thr)
when defined(windows):
doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString)
else:
doAssert getEnv("foo") == $c_getenv("foo".cstring)
doAssertRaises(OSError): delEnv("foo=bar")
when defined(windows):
import std/encodings
proc c_putenv(env: cstring): int32 {.importc: "putenv", header: "<stdlib.h>".}
proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "<stdlib.h>".}
block: # Bug #20083
# These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode
# characters correctly. This means that module X in the process calling the
# CRT environment variable API will get the correct string. Raw CRT API
# calls below represent module X.
# Getting an env. var. with unicode characters returns the correct UTF-8
# encoded string.
block:
const envName = "twin_envvars1"
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
doAssert existsEnv(envName)
doAssert getEnv(envName) == unicodeUtf8
# Putting an env. var. with unicode characters gives the correct UTF-16
# encoded string from low-level routine.
block:
const envName = "twin_envvars2"
putEnv(envName, unicodeUtf8)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
# Env. name containing Unicode characters is retrieved correctly
block:
const envName = unicodeUtf8 & "1"
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
doAssert existsEnv(envName)
doAssert getEnv(envName) == unicodeUtf8
# Env. name containing Unicode characters is set correctly
block:
const envName = unicodeUtf8 & "2"
putEnv(envName, unicodeUtf8)
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
# Env. name containing Unicode characters and empty value is set correctly
block:
const envName = unicodeUtf8 & "3"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
# It's hard to test on Windows code pages, because there is no "change
# a process' locale" API.
if getCurrentEncoding(true) == "windows-1252":
const
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding
# Test that env. var. ANSI API has correct encoding
block:
const
envName = unicodeUtf8 & "4"
envNameAnsi = unicodeAnsi & "4"
putEnv(envName, unicodeUtf8)
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi
block:
const
envName = unicodeUtf8 & "5"
envNameAnsi = unicodeAnsi & "5"
doAssert c_putenv((envNameAnsi & "=" & unicodeAnsi).cstring) == 0
doAssert getEnv(envName) == unicodeUtf8
# Env. name containing Unicode characters and empty value is set correctly;
# and, if env. name. characters cannot be represented in codepage, don't
# raise an error.
#
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
# windows-1250 locale has no representation of `abreveUtf8` below, so the
# conversion will fail, but this must not be fatal. It is expected that the
# routine ignores updating MBCS environment (`environ` global) and carries
# on.
block:
const
# "LATIN SMALL LETTER A WITH BREVE" in UTF-8
abreveUtf8 = "\xc4\x83"
envName = abreveUtf8 & "6"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert getEnv(envName) == ""