mirror of
https://github.com/nim-lang/Nim.git
synced 2026-02-17 16:38:33 +00:00
* Add use of Windows Wide CRT API for env. vars
Replaces use of CRT API `getenv` and `putenv` with respectively
`_wgetenv` and `_wputenv`. Motivation is to reliably convert environment
variables to UTF-8, and the wide API is best there, because it's
reliably UTF-16.
Changed the hack in `lib/std/private/win_setenv.nim` by switching the
order of the Unicode and MBCS environment update; Unicode first, MBCS
second. Because `_wgetenv`/`_wputenv` is now used, the Unicode
environment will be initialized, so it should always be updated.
Stop updating MBCS environment with the name of `getEnv`. It's not
necessarily true that MBCS encoding and the `string` encoding is the
same. Instead convert UTF-16 to current Windows code page with
`wcstombs`, and use that string to update MBCS.
Fixes regression in `6b3c77e` that caused `std/envvars.getEnv` or
`std/os.getEnv` on Windows to return non-UTF-8 encoded strings.
Add tests that test environment variables with Unicode characters in
their name or value.
* Fix test issues
Fixes
* `nim cpp` didn't compile the tests
* Nimscript import of `tosenv.nim` from `test_nimscript.nims` failed
with "cannot importc"
* Fix missing error check on `wcstombs`
* Fix ANSI testing errors
* Separate ANSI-related testing to their own tests, and only executing
them if running process has a specific code page
* Setting locale with `setlocale` was not reliable and didn't work on
certain machines
* Add handling of a "no character representation" error in second
`wcstombs` call
* tests/newruntime_misc: Increment allocCount
Increments overall allocations in `tnewruntime_misc` test. This is
because `getEnv` now does an additional allocation: allocation of the
UTF-16 string used as parameter to `c_wgetenv`.
* Revert "tests/newruntime_misc: Increment allocCount"
This reverts commit 4d4fe8bd3e.
* tests/newruntime_misc: Increment allocCount on Windows
Increments overall allocations in `tnewruntime_misc` test for Windows.
This is because `getEnv` on Windows now does an additional allocation:
allocation of the UTF-16 string used as parameter to `c_wgetenv`.
* Refactor, adding suggestions from code review
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
* Document, adding suggestions
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
Co-authored-by: ringabout <43030857+ringabout@users.noreply.github.com>
Co-authored-by: Clay Sweetser <Varriount@users.noreply.github.com>
157 lines
5.5 KiB
Nim
157 lines
5.5 KiB
Nim
discard """
|
|
matrix: "--threads:on"
|
|
joinable: false
|
|
targets: "c js cpp"
|
|
"""
|
|
|
|
import std/envvars
|
|
from std/sequtils import toSeq
|
|
import stdtest/testutils
|
|
|
|
# "LATIN CAPITAL LETTER AE" in UTF-8 (0xc386)
|
|
const unicodeUtf8 = "\xc3\x86"
|
|
|
|
template main =
|
|
block: # delEnv, existsEnv, getEnv, envPairs
|
|
for val in ["val", "", unicodeUtf8]: # ensures empty val works too
|
|
const key = "NIM_TESTS_TOSENV_KEY"
|
|
doAssert not existsEnv(key)
|
|
|
|
putEnv(key, "tempval")
|
|
doAssert existsEnv(key)
|
|
doAssert getEnv(key) == "tempval"
|
|
|
|
putEnv(key, val) # change a key that already exists
|
|
doAssert existsEnv(key)
|
|
doAssert getEnv(key) == val
|
|
|
|
doAssert (key, val) in toSeq(envPairs())
|
|
delEnv(key)
|
|
doAssert (key, val) notin toSeq(envPairs())
|
|
doAssert not existsEnv(key)
|
|
delEnv(key) # deleting an already deleted env var
|
|
doAssert not existsEnv(key)
|
|
|
|
block:
|
|
doAssert getEnv("NIM_TESTS_TOSENV_NONEXISTENT", "") == ""
|
|
doAssert getEnv("NIM_TESTS_TOSENV_NONEXISTENT", " ") == " "
|
|
doAssert getEnv("NIM_TESTS_TOSENV_NONEXISTENT", "defval") == "defval"
|
|
|
|
whenVMorJs: discard # xxx improve
|
|
do:
|
|
doAssertRaises(OSError, putEnv("NIM_TESTS_TOSENV_PUT=DUMMY_VALUE", "NEW_DUMMY_VALUE"))
|
|
doAssertRaises(OSError, putEnv("", "NEW_DUMMY_VALUE"))
|
|
doAssert not existsEnv("")
|
|
doAssert not existsEnv("NIM_TESTS_TOSENV_PUT=DUMMY_VALUE")
|
|
doAssert not existsEnv("NIM_TESTS_TOSENV_PUT")
|
|
|
|
main()
|
|
|
|
when defined(windows):
|
|
proc c_wgetenv(env: WideCString): WideCString {.importc: "_wgetenv", header: "<stdlib.h>".}
|
|
proc c_getenv(env: cstring): cstring {.importc: "getenv", header: "<stdlib.h>".}
|
|
|
|
when not defined(js) and not defined(nimscript):
|
|
block: # bug #18533
|
|
var thr: Thread[void]
|
|
proc threadFunc {.thread.} = putEnv("foo", "fooVal2")
|
|
|
|
putEnv("foo", "fooVal1")
|
|
doAssert getEnv("foo") == "fooVal1"
|
|
createThread(thr, threadFunc)
|
|
joinThreads(thr)
|
|
when defined(windows):
|
|
doAssert getEnv("foo") == $c_wgetenv("foo".newWideCString)
|
|
else:
|
|
doAssert getEnv("foo") == $c_getenv("foo".cstring)
|
|
|
|
doAssertRaises(OSError): delEnv("foo=bar")
|
|
|
|
when defined(windows):
|
|
import std/encodings
|
|
|
|
proc c_putenv(env: cstring): int32 {.importc: "putenv", header: "<stdlib.h>".}
|
|
proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "<stdlib.h>".}
|
|
|
|
block: # Bug #20083
|
|
# These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode
|
|
# characters correctly. This means that module X in the process calling the
|
|
# CRT environment variable API will get the correct string. Raw CRT API
|
|
# calls below represent module X.
|
|
|
|
# Getting an env. var. with unicode characters returns the correct UTF-8
|
|
# encoded string.
|
|
block:
|
|
const envName = "twin_envvars1"
|
|
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
|
|
doAssert existsEnv(envName)
|
|
doAssert getEnv(envName) == unicodeUtf8
|
|
|
|
# Putting an env. var. with unicode characters gives the correct UTF-16
|
|
# encoded string from low-level routine.
|
|
block:
|
|
const envName = "twin_envvars2"
|
|
putEnv(envName, unicodeUtf8)
|
|
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
|
|
|
|
# Env. name containing Unicode characters is retrieved correctly
|
|
block:
|
|
const envName = unicodeUtf8 & "1"
|
|
doAssert c_wputenv(newWideCString(envName & "=" & unicodeUtf8)) == 0
|
|
doAssert existsEnv(envName)
|
|
doAssert getEnv(envName) == unicodeUtf8
|
|
|
|
# Env. name containing Unicode characters is set correctly
|
|
block:
|
|
const envName = unicodeUtf8 & "2"
|
|
putEnv(envName, unicodeUtf8)
|
|
doAssert existsEnv(envName)
|
|
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
|
|
|
|
# Env. name containing Unicode characters and empty value is set correctly
|
|
block:
|
|
const envName = unicodeUtf8 & "3"
|
|
putEnv(envName, "")
|
|
doAssert existsEnv(envName)
|
|
doAssert $c_wgetenv(envName.newWideCString) == ""
|
|
|
|
# It's hard to test on Windows code pages, because there is no "change
|
|
# a process' locale" API.
|
|
if getCurrentEncoding(true) == "windows-1252":
|
|
const
|
|
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding
|
|
|
|
# Test that env. var. ANSI API has correct encoding
|
|
block:
|
|
const
|
|
envName = unicodeUtf8 & "4"
|
|
envNameAnsi = unicodeAnsi & "4"
|
|
putEnv(envName, unicodeUtf8)
|
|
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi
|
|
|
|
block:
|
|
const
|
|
envName = unicodeUtf8 & "5"
|
|
envNameAnsi = unicodeAnsi & "5"
|
|
doAssert c_putenv((envNameAnsi & "=" & unicodeAnsi).cstring) == 0
|
|
doAssert getEnv(envName) == unicodeUtf8
|
|
|
|
# Env. name containing Unicode characters and empty value is set correctly;
|
|
# and, if env. name. characters cannot be represented in codepage, don't
|
|
# raise an error.
|
|
#
|
|
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
|
|
# windows-1250 locale has no representation of `abreveUtf8` below, so the
|
|
# conversion will fail, but this must not be fatal. It is expected that the
|
|
# routine ignores updating MBCS environment (`environ` global) and carries
|
|
# on.
|
|
block:
|
|
const
|
|
# "LATIN SMALL LETTER A WITH BREVE" in UTF-8
|
|
abreveUtf8 = "\xc4\x83"
|
|
envName = abreveUtf8 & "6"
|
|
putEnv(envName, "")
|
|
doAssert existsEnv(envName)
|
|
doAssert $c_wgetenv(envName.newWideCString) == ""
|
|
doAssert getEnv(envName) == ""
|