Fix ANSI testing errors

* Separate ANSI-related testing to their own tests, and only executing
  them if running process has a specific code page
  * Setting locale with `setlocale` was not reliable and didn't work on
    certain machines
* Add handling of a "no character representation" error in second
  `wcstombs` call
This commit is contained in:
Håvard Mjaavatten
2022-07-26 01:46:23 +02:00
parent a78ac38cee
commit 35727a28b1
3 changed files with 93 additions and 71 deletions

View File

@@ -81,18 +81,18 @@ else:
# wcstombs returns (size_t) (-1) if any characters cannot be represented
# in the current codepage. Skip updating MBCS environment in this case.
# For some reason, second `wcstombs` can find non-convertible characters
# that the first `wcstombs` cannot.
let requiredSizeS = wcstombs(nil, wideName, 0)
if requiredSizeS != high(csize_t):
let requiredSize = requiredSizeS.int
var buf = newSeq[char](requiredSize + 1)
let buf2 = buf[0].addr
if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) == csize_t(high(uint)):
errno = EINVAL
return -1
var ptrToEnv = c_getenv(buf2)
ptrToEnv[0] = '\0'
ptrToEnv = c_getenv(buf2)
ptrToEnv[1] = '='
if wcstombs(buf2, wideName, csize_t(requiredSize + 1)) != high(csize_t):
var ptrToEnv = c_getenv(buf2)
ptrToEnv[0] = '\0'
ptrToEnv = c_getenv(buf2)
ptrToEnv[1] = '='
# And now, we have to update the outer environment to have a proper empty value.
if setEnvironmentVariableW(wideName, value.newWideCString) == 0:

View File

@@ -68,15 +68,10 @@ when not defined(js) and not defined(nimscript):
doAssertRaises(OSError): delEnv("foo=bar")
when defined(windows):
const
LC_ALL = 0
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding
import std/encodings
proc c_putenv(env: cstring): int32 {.importc: "putenv", header: "<stdlib.h>".}
proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "<stdlib.h>".}
proc setlocale(category: cint, locale: cstring): cstring {.importc, header: "<locale.h>".}
# Set locale required to represent `unicodeAnsi`
discard setlocale(LC_ALL, cstring"English_United States.1252")
block: # Bug #20083
# These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode
@@ -98,7 +93,6 @@ when defined(windows):
const envName = "twin_envvars2"
putEnv(envName, unicodeUtf8)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
doAssert $c_getenv(envName) == unicodeAnsi
# Env. name containing Unicode characters is retrieved correctly
block:
@@ -109,37 +103,54 @@ when defined(windows):
# Env. name containing Unicode characters is set correctly
block:
const
envName = unicodeUtf8 & "2"
envNameAnsi = unicodeAnsi & "2"
const envName = unicodeUtf8 & "2"
putEnv(envName, unicodeUtf8)
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi
# Env. name containing Unicode characters and empty value is set correctly
block:
const
envName = unicodeUtf8 & "3"
envNameAnsi = unicodeAnsi & "3"
const envName = unicodeUtf8 & "3"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert $c_getenv(envNameAnsi.cstring) == ""
# Env. name containing Unicode characters and empty value is set correctly;
# and, if env. name. characters cannot be represented in codepage, don't
# raise an error.
#
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
# Polish_Poland.1250 locale has no representation of `unicodeUtf8`, so the
# conversion will fail, but this must not be fatal. It is expected that the
# routine ignores updating MBCS environment (`environ` global) and carries
# on.
block:
const envName = unicodeUtf8 & "4"
discard setlocale(LC_ALL, cstring"Polish_Poland.1250")
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert getEnv(envName) == ""
# It's hard to test on Windows code pages, because there is no "change
# a process' locale" API.
if getCurrentEncoding(true) == "windows-1252":
const
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding
# Test that env. var. ANSI API has correct encoding
block:
const
envName = unicodeUtf8 & "4"
envNameAnsi = unicodeAnsi & "4"
putEnv(envName, unicodeUtf8)
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi
block:
const
envName = unicodeUtf8 & "5"
envNameAnsi = unicodeAnsi & "5"
doAssert c_putenv((envNameAnsi & "=" & unicodeAnsi).cstring) == 0
doAssert getEnv(envName) == unicodeUtf8
# Env. name containing Unicode characters and empty value is set correctly;
# and, if env. name. characters cannot be represented in codepage, don't
# raise an error.
#
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
# windows-1250 locale has no representation of `abreveUtf8` below, so the
# conversion will fail, but this must not be fatal. It is expected that the
# routine ignores updating MBCS environment (`environ` global) and carries
# on.
block:
const
# "LATIN SMALL LETTER A WITH BREVE" in UTF-8
abreveUtf8 = "\xc4\x83"
envName = abreveUtf8 & "6"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert getEnv(envName) == ""

View File

@@ -69,15 +69,10 @@ when not defined(js) and not defined(nimscript):
doAssertRaises(OSError): delEnv("foo=bar")
when defined(windows) and not defined(nimscript):
const
LC_ALL = 0
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding
import std/encodings
proc c_putenv(env: cstring): int32 {.importc: "putenv", header: "<stdlib.h>".}
proc c_wputenv(env: WideCString): int32 {.importc: "_wputenv", header: "<stdlib.h>".}
proc setlocale(category: cint, locale: cstring): cstring {.importc, header: "<locale.h>".}
# Set locale required to represent `unicodeAnsi`
discard setlocale(LC_ALL, cstring"English_United States.1252")
block: # Bug #20083
# These test that `getEnv`, `putEnv` and `existsEnv` handle Unicode
@@ -99,7 +94,6 @@ when defined(windows) and not defined(nimscript):
const envName = "twin_envvars2"
putEnv(envName, unicodeUtf8)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
doAssert $c_getenv(envName) == unicodeAnsi
# Env. name containing Unicode characters is retrieved correctly
block:
@@ -110,37 +104,54 @@ when defined(windows) and not defined(nimscript):
# Env. name containing Unicode characters is set correctly
block:
const
envName = unicodeUtf8 & "2"
envNameAnsi = unicodeAnsi & "2"
const envName = unicodeUtf8 & "2"
putEnv(envName, unicodeUtf8)
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == unicodeUtf8
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi
# Env. name containing Unicode characters and empty value is set correctly
block:
const
envName = unicodeUtf8 & "3"
envNameAnsi = unicodeAnsi & "3"
const envName = unicodeUtf8 & "3"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert $c_getenv(envNameAnsi.cstring) == ""
# Env. name containing Unicode characters and empty value is set correctly;
# and, if env. name. characters cannot be represented in codepage, don't
# raise an error.
#
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
# Polish_Poland.1250 locale has no representation of `unicodeUtf8`, so the
# conversion will fail, but this must not be fatal. It is expected that the
# routine ignores updating MBCS environment (`environ` global) and carries
# on.
block:
const envName = unicodeUtf8 & "4"
discard setlocale(LC_ALL, cstring"Polish_Poland.1250")
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert getEnv(envName) == ""
# It's hard to test on Windows code pages, because there is no "change
# a process' locale" API.
if getCurrentEncoding(true) == "windows-1252":
const
unicodeAnsi = "\xc6" # `unicodeUtf8` in `windows-1252` encoding
# Test that env. var. ANSI API has correct encoding
block:
const
envName = unicodeUtf8 & "4"
envNameAnsi = unicodeAnsi & "4"
putEnv(envName, unicodeUtf8)
doAssert $c_getenv(envNameAnsi.cstring) == unicodeAnsi
block:
const
envName = unicodeUtf8 & "5"
envNameAnsi = unicodeAnsi & "5"
doAssert c_putenv((envNameAnsi & "=" & unicodeAnsi).cstring) == 0
doAssert getEnv(envName) == unicodeUtf8
# Env. name containing Unicode characters and empty value is set correctly;
# and, if env. name. characters cannot be represented in codepage, don't
# raise an error.
#
# `win_setenv.nim` converts UTF-16 to ANSI when setting empty env. var. The
# windows-1250 locale has no representation of `abreveUtf8` below, so the
# conversion will fail, but this must not be fatal. It is expected that the
# routine ignores updating MBCS environment (`environ` global) and carries
# on.
block:
const
# "LATIN SMALL LETTER A WITH BREVE" in UTF-8
abreveUtf8 = "\xc4\x83"
envName = abreveUtf8 & "6"
putEnv(envName, "")
doAssert existsEnv(envName)
doAssert $c_wgetenv(envName.newWideCString) == ""
doAssert getEnv(envName) == ""