mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-06 06:38:20 +00:00
Merge pull request #6336 from bplu4t2f/master
Fix MultiByteToWideChar usage in utf8_to_wstring_buf
This commit is contained in:
@@ -91,67 +91,149 @@ LANGIDFROMLCID :: #force_inline proc "contextless" (lcid: LCID) -> LANGID {
|
||||
|
||||
@(require_results)
|
||||
utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> []u16 {
|
||||
if len(s) < 1 {
|
||||
s_length := len(s)
|
||||
if s_length < 1 {
|
||||
return nil
|
||||
}
|
||||
if s_length > cast(int)max(c_int) {
|
||||
// Unsupported (input string is excessively long).
|
||||
return nil
|
||||
}
|
||||
|
||||
b := transmute([]byte)s
|
||||
cstr := raw_data(b)
|
||||
n := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(len(s)), nil, 0)
|
||||
if n == 0 {
|
||||
n := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(s_length), nil, 0)
|
||||
if n <= 0 || cast(int)n >= max(int) {
|
||||
// If n is equal to or greater than max(int), then we will not be able
|
||||
// to create a big enough slice with the null terminator.
|
||||
// NOTE: This only affects 32-bit systems and is purely pedantic because
|
||||
// the system will never be able to allocate that much memory.
|
||||
return nil
|
||||
}
|
||||
|
||||
text := make([]u16, n+1, allocator)
|
||||
text := make([]u16, cast(int)n + 1, allocator)
|
||||
if text == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(len(s)), raw_data(text), n)
|
||||
if n1 == 0 {
|
||||
n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(s_length), raw_data(text), n)
|
||||
if n1 <= 0 {
|
||||
delete(text, allocator)
|
||||
return nil
|
||||
}
|
||||
|
||||
// null-terminate the result here, even though the null element is not
|
||||
// part of the slice. This is done to prevent callers which relied on
|
||||
// this behavior, and is also expected by utf8_to_wstring_alloc.
|
||||
text[n] = 0
|
||||
for n >= 1 && text[n-1] == 0 {
|
||||
n -= 1
|
||||
}
|
||||
return text[:n]
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 {
|
||||
n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), nil, 0)
|
||||
if n1 == 0 {
|
||||
buf_length := len(buf)
|
||||
if buf_length < 1 {
|
||||
return nil
|
||||
} else if int(n1) > len(buf) {
|
||||
}
|
||||
s_length := len(s)
|
||||
if s_length == 0 {
|
||||
return nil
|
||||
}
|
||||
if s_length > cast(int)max(c_int) {
|
||||
// Unsupported (input string is excessively long).
|
||||
return nil
|
||||
}
|
||||
if buf_length > cast(int)max(c_int) {
|
||||
buf_length = cast(int)max(c_int)
|
||||
}
|
||||
elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(s_length), raw_data(buf), cast(c_int)buf_length)
|
||||
if elements_written <= 0 {
|
||||
// Insufficient buffer size, empty input string, or invalid characters. Contents of the buffer may have been modified.
|
||||
return nil
|
||||
}
|
||||
|
||||
n1 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), n1)
|
||||
if n1 == 0 {
|
||||
return nil
|
||||
} else if int(n1) > len(buf) {
|
||||
// To be consistent with utf8_to_utf16_alloc, the output string
|
||||
// is null-terminated here in the buffer, even though the terminating null character
|
||||
// is not part of the returned slice.
|
||||
if buf_length <= cast(int)elements_written {
|
||||
// The terminating null character does not fit.
|
||||
// Need at least a length of (elements_written+1).
|
||||
return nil
|
||||
}
|
||||
return buf[:n1]
|
||||
buf[elements_written] = 0
|
||||
return buf[:elements_written]
|
||||
}
|
||||
|
||||
// Converts a regular UTF-8 `string` to UTF-16.
|
||||
//
|
||||
// The conversion includes any null characters present in the input string.
|
||||
//
|
||||
// Returns `nil` on conversion failure.
|
||||
//
|
||||
// Conversion may fail due to an invalid byte sequence in the input string,
|
||||
// or an insufficient buffer size (`utf8_to_utf16_buf` only),
|
||||
// or allocation failure (`utf8_to_utf16_alloc` only).
|
||||
//
|
||||
// The result of converting an empty string is indistinguishable from conversion failure.
|
||||
utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf}
|
||||
|
||||
@(require_results)
|
||||
utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> wstring {
|
||||
if res := utf8_to_utf16(s, allocator); len(res) > 0 {
|
||||
return wstring(raw_data(res))
|
||||
if len(s) == 0 {
|
||||
// Empty string. Needs special care because an empty string
|
||||
// is different from conversion failure.
|
||||
buf := make([]u16, 1, allocator)
|
||||
if buf == nil {
|
||||
return nil
|
||||
}
|
||||
buf[0] = 0
|
||||
return wstring(raw_data(buf))
|
||||
}
|
||||
// utf8_to_utf16 null-terminates the result in the allocated memory block,
|
||||
// however, the null character is not part of the returned slice (it is just beyond).
|
||||
// The conversion to wstring will bypass this implicit overrun.
|
||||
res := utf8_to_utf16(s, allocator)
|
||||
if len(res) > 0 {
|
||||
return wstring(raw_data(res))
|
||||
} else {
|
||||
// Conversion failure.
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@(require_results)
|
||||
utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring {
|
||||
if res := utf8_to_utf16(buf, s); len(res) > 0 {
|
||||
return wstring(raw_data(res))
|
||||
buf_length := len(buf)
|
||||
if buf_length == 0 {
|
||||
// Insufficient buffer size, even for an empty string.
|
||||
return nil
|
||||
}
|
||||
if len(s) == 0 {
|
||||
// Empty string. Needs special care because an empty string
|
||||
// is different from conversion failure.
|
||||
buf[0] = 0
|
||||
return wstring(raw_data(buf))
|
||||
}
|
||||
// utf8_to_utf16 null-terminates the result in the buffer,
|
||||
// however, the null character is not part of the returned slice (it is just beyond).
|
||||
// The conversion to wstring will bypass this implicit overrun.
|
||||
res := utf8_to_utf16(buf[:], s)
|
||||
if len(res) > 0 {
|
||||
return wstring(raw_data(res))
|
||||
} else {
|
||||
// Conversion failure.
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Converts a regular UTF-8 `string` to UTF-16, and returns the result as a
|
||||
// null-terminated `wstring`, or `nil` on conversion failure.
|
||||
//
|
||||
// Conversion may fail due to an invalid byte sequence in the input string,
|
||||
// or an insufficient buffer size (`utf8_to_wstring_buf` only),
|
||||
// or allocation failure (`utf8_to_wstring_alloc` only).
|
||||
//
|
||||
// An empty string is valid, and results in a value distinct from `nil`.
|
||||
utf8_to_wstring :: proc{utf8_to_wstring_alloc, utf8_to_wstring_buf}
|
||||
|
||||
@(require_results)
|
||||
|
||||
@@ -4,6 +4,7 @@ package test_core_sys_windows
|
||||
import "base:intrinsics"
|
||||
import "core:testing"
|
||||
import win32 "core:sys/windows"
|
||||
import runtime "base:runtime"
|
||||
|
||||
UTF16_Vector :: struct {
|
||||
wstr: win32.wstring,
|
||||
@@ -31,4 +32,223 @@ utf16_to_utf8_buf_test :: proc(t: ^testing.T) {
|
||||
res := win32.utf16_to_utf8_buf(buf[:], transmute([]u16)wstr)
|
||||
testing.expect_value(t, res, test.ustr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@(test)
|
||||
utf8_to_utf16_buf_test :: proc(t: ^testing.T) {
|
||||
buf : [100]u16 = ---
|
||||
// Test everything with a dirty buffer!
|
||||
reset_buffer :: proc(buf : []u16) {
|
||||
for i in 0 ..< len(buf) {
|
||||
buf[i] = cast(u16)(i + 1)
|
||||
}
|
||||
}
|
||||
|
||||
result : []u16
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_utf16_buf(buf[:], "Hello\x00, World!")
|
||||
testing.expect_value(t, len(result), 14)
|
||||
testing.expect_value(t, result[4], 'o')
|
||||
testing.expect_value(t, result[5], 0)
|
||||
testing.expect_value(t, result[6], ',')
|
||||
testing.expect_value(t, result[13], '!')
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_utf16_buf(buf[:], "H\x00\x00")
|
||||
testing.expect_value(t, len(result), 3)
|
||||
testing.expect_value(t, result[1], 0)
|
||||
testing.expect_value(t, result[2], 0)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_utf16_buf(buf[:], "你好,世界!")
|
||||
testing.expect_value(t, len(result), 6)
|
||||
testing.expect_value(t, result[0], 0x4F60)
|
||||
testing.expect_value(t, result[1], 0x597D)
|
||||
testing.expect_value(t, result[2], 0xFF0C)
|
||||
testing.expect_value(t, result[3], 0x4E16)
|
||||
testing.expect_value(t, result[4], 0x754C)
|
||||
testing.expect_value(t, result[5], 0xFF01)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_utf16_buf(buf[:4], "Hello")
|
||||
// Buffer too short.
|
||||
testing.expect(t, result == nil)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_utf16_buf(buf[:], "")
|
||||
// Valid, but indistinguishable from an error.
|
||||
testing.expect_value(t, len(result), 0)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_utf16_buf(buf[:0], "Hello")
|
||||
// Buffer too short.
|
||||
testing.expect(t, result == nil)
|
||||
}
|
||||
|
||||
@(test)
|
||||
utf8_to_wstring_buf_test :: proc(t : ^testing.T) {
|
||||
buf : [100]u16 = ---
|
||||
// Test everything with a dirty buffer!
|
||||
reset_buffer :: proc(buf : []u16) {
|
||||
for i in 0 ..< len(buf) {
|
||||
buf[i] = cast(u16)(i + 1)
|
||||
}
|
||||
}
|
||||
|
||||
result : win32.wstring
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_wstring_buf(buf[:], "Hello\x00, World!")
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[13], '!')
|
||||
testing.expect_value(t, buf[14], 0)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_wstring_buf(buf[:], "H\x00\x00")
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[1], 0)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_wstring_buf(buf[:], "你好,世界!")
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[0], 0x4F60)
|
||||
testing.expect_value(t, buf[1], 0x597D)
|
||||
testing.expect_value(t, buf[2], 0xFF0C)
|
||||
testing.expect_value(t, buf[3], 0x4E16)
|
||||
testing.expect_value(t, buf[4], 0x754C)
|
||||
testing.expect_value(t, buf[5], 0xFF01)
|
||||
testing.expect_value(t, buf[6], 0)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_wstring_buf(buf[:5], "Hello")
|
||||
// Buffer too short.
|
||||
testing.expect_value(t, result, nil)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_wstring_buf(buf[:6], "Hello")
|
||||
// Buffer *just* long enough.
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[4], 'o')
|
||||
testing.expect_value(t, buf[5], 0)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_wstring_buf(buf[:], "")
|
||||
// Valid, and distinguishable from an error.
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[0], 0)
|
||||
|
||||
reset_buffer(buf[:])
|
||||
result = win32.utf8_to_wstring_buf(buf[:0], "Hello")
|
||||
// Buffer too short.
|
||||
testing.expect(t, result == nil)
|
||||
}
|
||||
|
||||
// Custom allocator proc that always returns dirty (non-zeroed) memory.
|
||||
dirty_allocator_proc :: proc(allocator_data: rawptr, mode: runtime.Allocator_Mode,
|
||||
size, alignment: int,
|
||||
old_memory: rawptr, old_size: int,
|
||||
location: runtime.Source_Code_Location = #caller_location) -> ([]byte, runtime.Allocator_Error) {
|
||||
real_allocator := cast(^runtime.Allocator)allocator_data
|
||||
bytes, error := real_allocator.procedure(real_allocator.data, mode,
|
||||
size, alignment,
|
||||
old_memory, old_size,
|
||||
location)
|
||||
if error == .None {
|
||||
for i in 0 ..< len(bytes) {
|
||||
// This will yield a 0 byte on overflow, but that does not matter in this test suite.
|
||||
bytes[i] = cast(byte)(i + 1)
|
||||
}
|
||||
}
|
||||
return bytes, error
|
||||
}
|
||||
|
||||
@(test)
|
||||
utf8_to_utf16_alloc_test :: proc(t : ^testing.T) {
|
||||
// We want to ensure that everything works with dirty
|
||||
// (non-zeroed) memory returned from the allocator.
|
||||
real_allocator := context.temp_allocator
|
||||
allocator := runtime.Allocator {
|
||||
procedure = dirty_allocator_proc,
|
||||
data = cast(rawptr)&real_allocator,
|
||||
}
|
||||
|
||||
// Test the dirty allocator.
|
||||
allocator_test_slice := make([]u8, 100, allocator)
|
||||
testing.expect_value(t, len(allocator_test_slice), 100)
|
||||
for i in 0 ..< len(allocator_test_slice) {
|
||||
testing.expect_value(t, allocator_test_slice[i], cast(u8)(i + 1))
|
||||
}
|
||||
|
||||
result : []u16
|
||||
|
||||
result = win32.utf8_to_utf16_alloc("Hello\x00, World!", allocator)
|
||||
testing.expect_value(t, len(result), 14)
|
||||
testing.expect_value(t, result[4], 'o')
|
||||
testing.expect_value(t, result[5], 0)
|
||||
testing.expect_value(t, result[6], ',')
|
||||
testing.expect_value(t, result[13], '!')
|
||||
|
||||
result = win32.utf8_to_utf16_alloc("H\x00\x00", allocator)
|
||||
testing.expect_value(t, len(result), 3)
|
||||
testing.expect_value(t, result[1], 0)
|
||||
testing.expect_value(t, result[2], 0)
|
||||
|
||||
result = win32.utf8_to_utf16_alloc("你好,世界!", allocator)
|
||||
testing.expect_value(t, len(result), 6)
|
||||
testing.expect_value(t, result[0], 0x4F60)
|
||||
testing.expect_value(t, result[1], 0x597D)
|
||||
testing.expect_value(t, result[2], 0xFF0C)
|
||||
testing.expect_value(t, result[3], 0x4E16)
|
||||
testing.expect_value(t, result[4], 0x754C)
|
||||
testing.expect_value(t, result[5], 0xFF01)
|
||||
|
||||
result = win32.utf8_to_utf16_alloc("", allocator)
|
||||
// Valid, but indistinguishable from an error.
|
||||
testing.expect_value(t, len(result), 0)
|
||||
}
|
||||
|
||||
@(test)
|
||||
utf8_to_wstring_alloc_test :: proc(t : ^testing.T) {
|
||||
// We want to ensure that everything works with dirty
|
||||
// (non-zeroed) memory returned from the allocator.
|
||||
backing_allocator := context.temp_allocator
|
||||
allocator := runtime.Allocator {
|
||||
procedure = dirty_allocator_proc,
|
||||
data = cast(rawptr)&backing_allocator,
|
||||
}
|
||||
|
||||
result : win32.wstring
|
||||
buf : [^]u16
|
||||
|
||||
result = win32.utf8_to_wstring_alloc("Hello\x00, World!", allocator)
|
||||
buf = transmute([^]u16)result
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[4], 'o')
|
||||
testing.expect_value(t, buf[5], 0)
|
||||
testing.expect_value(t, buf[6], ',')
|
||||
testing.expect_value(t, buf[13], '!')
|
||||
testing.expect_value(t, buf[14], 0)
|
||||
|
||||
result = win32.utf8_to_wstring_alloc("H\x00\x00", allocator)
|
||||
buf = transmute([^]u16)result
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[1], 0)
|
||||
|
||||
result = win32.utf8_to_wstring_alloc("你好,世界!", allocator)
|
||||
buf = transmute([^]u16)result
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[0], 0x4F60)
|
||||
testing.expect_value(t, buf[1], 0x597D)
|
||||
testing.expect_value(t, buf[2], 0xFF0C)
|
||||
testing.expect_value(t, buf[3], 0x4E16)
|
||||
testing.expect_value(t, buf[4], 0x754C)
|
||||
testing.expect_value(t, buf[5], 0xFF01)
|
||||
testing.expect_value(t, buf[6], 0)
|
||||
|
||||
result = win32.utf8_to_wstring_alloc("", allocator)
|
||||
buf = transmute([^]u16)result
|
||||
// Valid, and distinguishable from an error.
|
||||
testing.expect(t, result != nil)
|
||||
testing.expect_value(t, buf[0], 0)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user