diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 8cabdeb04..542d2718e 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -91,67 +91,149 @@ LANGIDFROMLCID :: #force_inline proc "contextless" (lcid: LCID) -> LANGID { @(require_results) utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> []u16 { - if len(s) < 1 { + s_length := len(s) + if s_length < 1 { + return nil + } + if s_length > cast(int)max(c_int) { + // Unsupported (input string is excessively long). return nil } b := transmute([]byte)s cstr := raw_data(b) - n := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(len(s)), nil, 0) - if n == 0 { + n := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(s_length), nil, 0) + if n <= 0 || cast(int)n >= max(int) { + // If n is equal to or greater than max(int), then we will not be able + // to create a big enough slice with the null terminator. + // NOTE: This only affects 32-bit systems and is purely pedantic because + // the system will never be able to allocate that much memory. return nil } - text := make([]u16, n+1, allocator) + text := make([]u16, cast(int)n + 1, allocator) + if text == nil { + return nil + } - n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(len(s)), raw_data(text), n) - if n1 == 0 { + n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(s_length), raw_data(text), n) + if n1 <= 0 { delete(text, allocator) return nil } + // null-terminate the result here, even though the null element is not + // part of the slice. This is done to prevent callers which relied on + // this behavior, and is also expected by utf8_to_wstring_alloc. text[n] = 0 - for n >= 1 && text[n-1] == 0 { - n -= 1 - } return text[:n] } @(require_results) utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { - n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), nil, 0) - if n1 == 0 { + buf_length := len(buf) + if buf_length < 1 { return nil - } else if int(n1) > len(buf) { + } + s_length := len(s) + if s_length == 0 { + return nil + } + if s_length > cast(int)max(c_int) { + // Unsupported (input string is excessively long). + return nil + } + if buf_length > cast(int)max(c_int) { + buf_length = cast(int)max(c_int) + } + elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(s_length), raw_data(buf), cast(c_int)buf_length) + if elements_written <= 0 { + // Insufficient buffer size, empty input string, or invalid characters. Contents of the buffer may have been modified. return nil } - n1 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), n1) - if n1 == 0 { - return nil - } else if int(n1) > len(buf) { + // To be consistent with utf8_to_utf16_alloc, the output string + // is null-terminated here in the buffer, even though the terminating null character + // is not part of the returned slice. + if buf_length <= cast(int)elements_written { + // The terminating null character does not fit. + // Need at least a length of (elements_written+1). return nil } - return buf[:n1] + buf[elements_written] = 0 + return buf[:elements_written] } + +// Converts a regular UTF-8 `string` to UTF-16. +// +// The conversion includes any null characters present in the input string. +// +// Returns `nil` on conversion failure. +// +// Conversion may fail due to an invalid byte sequence in the input string, +// or an insufficient buffer size (`utf8_to_utf16_buf` only), +// or allocation failure (`utf8_to_utf16_alloc` only). +// +// The result of converting an empty string is indistinguishable from conversion failure. utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf} @(require_results) utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> wstring { - if res := utf8_to_utf16(s, allocator); len(res) > 0 { - return wstring(raw_data(res)) + if len(s) == 0 { + // Empty string. Needs special care because an empty string + // is different from conversion failure. + buf := make([]u16, 1, allocator) + if buf == nil { + return nil + } + buf[0] = 0 + return wstring(raw_data(buf)) + } + // utf8_to_utf16 null-terminates the result in the allocated memory block, + // however, the null character is not part of the returned slice (it is just beyond). + // The conversion to wstring will bypass this implicit overrun. + res := utf8_to_utf16(s, allocator) + if len(res) > 0 { + return wstring(raw_data(res)) + } else { + // Conversion failure. + return nil } - return nil } @(require_results) utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { - if res := utf8_to_utf16(buf, s); len(res) > 0 { - return wstring(raw_data(res)) + buf_length := len(buf) + if buf_length == 0 { + // Insufficient buffer size, even for an empty string. + return nil + } + if len(s) == 0 { + // Empty string. Needs special care because an empty string + // is different from conversion failure. + buf[0] = 0 + return wstring(raw_data(buf)) + } + // utf8_to_utf16 null-terminates the result in the buffer, + // however, the null character is not part of the returned slice (it is just beyond). + // The conversion to wstring will bypass this implicit overrun. + res := utf8_to_utf16(buf[:], s) + if len(res) > 0 { + return wstring(raw_data(res)) + } else { + // Conversion failure. + return nil } - return nil } +// Converts a regular UTF-8 `string` to UTF-16, and returns the result as a +// null-terminated `wstring`, or `nil` on conversion failure. +// +// Conversion may fail due to an invalid byte sequence in the input string, +// or an insufficient buffer size (`utf8_to_wstring_buf` only), +// or allocation failure (`utf8_to_wstring_alloc` only). +// +// An empty string is valid, and results in a value distinct from `nil`. utf8_to_wstring :: proc{utf8_to_wstring_alloc, utf8_to_wstring_buf} @(require_results) diff --git a/tests/core/sys/windows/util.odin b/tests/core/sys/windows/util.odin index e2ab9cde0..777f85776 100644 --- a/tests/core/sys/windows/util.odin +++ b/tests/core/sys/windows/util.odin @@ -4,6 +4,7 @@ package test_core_sys_windows import "base:intrinsics" import "core:testing" import win32 "core:sys/windows" +import runtime "base:runtime" UTF16_Vector :: struct { wstr: win32.wstring, @@ -31,4 +32,223 @@ utf16_to_utf8_buf_test :: proc(t: ^testing.T) { res := win32.utf16_to_utf8_buf(buf[:], transmute([]u16)wstr) testing.expect_value(t, res, test.ustr) } -} \ No newline at end of file +} + +@(test) +utf8_to_utf16_buf_test :: proc(t: ^testing.T) { + buf : [100]u16 = --- + // Test everything with a dirty buffer! + reset_buffer :: proc(buf : []u16) { + for i in 0 ..< len(buf) { + buf[i] = cast(u16)(i + 1) + } + } + + result : []u16 + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "Hello\x00, World!") + testing.expect_value(t, len(result), 14) + testing.expect_value(t, result[4], 'o') + testing.expect_value(t, result[5], 0) + testing.expect_value(t, result[6], ',') + testing.expect_value(t, result[13], '!') + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "H\x00\x00") + testing.expect_value(t, len(result), 3) + testing.expect_value(t, result[1], 0) + testing.expect_value(t, result[2], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "你好,世界!") + testing.expect_value(t, len(result), 6) + testing.expect_value(t, result[0], 0x4F60) + testing.expect_value(t, result[1], 0x597D) + testing.expect_value(t, result[2], 0xFF0C) + testing.expect_value(t, result[3], 0x4E16) + testing.expect_value(t, result[4], 0x754C) + testing.expect_value(t, result[5], 0xFF01) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:4], "Hello") + // Buffer too short. + testing.expect(t, result == nil) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "") + // Valid, but indistinguishable from an error. + testing.expect_value(t, len(result), 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:0], "Hello") + // Buffer too short. + testing.expect(t, result == nil) +} + +@(test) +utf8_to_wstring_buf_test :: proc(t : ^testing.T) { + buf : [100]u16 = --- + // Test everything with a dirty buffer! + reset_buffer :: proc(buf : []u16) { + for i in 0 ..< len(buf) { + buf[i] = cast(u16)(i + 1) + } + } + + result : win32.wstring + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "Hello\x00, World!") + testing.expect(t, result != nil) + testing.expect_value(t, buf[13], '!') + testing.expect_value(t, buf[14], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "H\x00\x00") + testing.expect(t, result != nil) + testing.expect_value(t, buf[1], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "你好,世界!") + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0x4F60) + testing.expect_value(t, buf[1], 0x597D) + testing.expect_value(t, buf[2], 0xFF0C) + testing.expect_value(t, buf[3], 0x4E16) + testing.expect_value(t, buf[4], 0x754C) + testing.expect_value(t, buf[5], 0xFF01) + testing.expect_value(t, buf[6], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:5], "Hello") + // Buffer too short. + testing.expect_value(t, result, nil) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:6], "Hello") + // Buffer *just* long enough. + testing.expect(t, result != nil) + testing.expect_value(t, buf[4], 'o') + testing.expect_value(t, buf[5], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "") + // Valid, and distinguishable from an error. + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:0], "Hello") + // Buffer too short. + testing.expect(t, result == nil) +} + +// Custom allocator proc that always returns dirty (non-zeroed) memory. +dirty_allocator_proc :: proc(allocator_data: rawptr, mode: runtime.Allocator_Mode, + size, alignment: int, + old_memory: rawptr, old_size: int, + location: runtime.Source_Code_Location = #caller_location) -> ([]byte, runtime.Allocator_Error) { + real_allocator := cast(^runtime.Allocator)allocator_data + bytes, error := real_allocator.procedure(real_allocator.data, mode, + size, alignment, + old_memory, old_size, + location) + if error == .None { + for i in 0 ..< len(bytes) { + // This will yield a 0 byte on overflow, but that does not matter in this test suite. + bytes[i] = cast(byte)(i + 1) + } + } + return bytes, error +} + +@(test) +utf8_to_utf16_alloc_test :: proc(t : ^testing.T) { + // We want to ensure that everything works with dirty + // (non-zeroed) memory returned from the allocator. + real_allocator := context.temp_allocator + allocator := runtime.Allocator { + procedure = dirty_allocator_proc, + data = cast(rawptr)&real_allocator, + } + + // Test the dirty allocator. + allocator_test_slice := make([]u8, 100, allocator) + testing.expect_value(t, len(allocator_test_slice), 100) + for i in 0 ..< len(allocator_test_slice) { + testing.expect_value(t, allocator_test_slice[i], cast(u8)(i + 1)) + } + + result : []u16 + + result = win32.utf8_to_utf16_alloc("Hello\x00, World!", allocator) + testing.expect_value(t, len(result), 14) + testing.expect_value(t, result[4], 'o') + testing.expect_value(t, result[5], 0) + testing.expect_value(t, result[6], ',') + testing.expect_value(t, result[13], '!') + + result = win32.utf8_to_utf16_alloc("H\x00\x00", allocator) + testing.expect_value(t, len(result), 3) + testing.expect_value(t, result[1], 0) + testing.expect_value(t, result[2], 0) + + result = win32.utf8_to_utf16_alloc("你好,世界!", allocator) + testing.expect_value(t, len(result), 6) + testing.expect_value(t, result[0], 0x4F60) + testing.expect_value(t, result[1], 0x597D) + testing.expect_value(t, result[2], 0xFF0C) + testing.expect_value(t, result[3], 0x4E16) + testing.expect_value(t, result[4], 0x754C) + testing.expect_value(t, result[5], 0xFF01) + + result = win32.utf8_to_utf16_alloc("", allocator) + // Valid, but indistinguishable from an error. + testing.expect_value(t, len(result), 0) +} + +@(test) +utf8_to_wstring_alloc_test :: proc(t : ^testing.T) { + // We want to ensure that everything works with dirty + // (non-zeroed) memory returned from the allocator. + backing_allocator := context.temp_allocator + allocator := runtime.Allocator { + procedure = dirty_allocator_proc, + data = cast(rawptr)&backing_allocator, + } + + result : win32.wstring + buf : [^]u16 + + result = win32.utf8_to_wstring_alloc("Hello\x00, World!", allocator) + buf = transmute([^]u16)result + testing.expect(t, result != nil) + testing.expect_value(t, buf[4], 'o') + testing.expect_value(t, buf[5], 0) + testing.expect_value(t, buf[6], ',') + testing.expect_value(t, buf[13], '!') + testing.expect_value(t, buf[14], 0) + + result = win32.utf8_to_wstring_alloc("H\x00\x00", allocator) + buf = transmute([^]u16)result + testing.expect(t, result != nil) + testing.expect_value(t, buf[1], 0) + + result = win32.utf8_to_wstring_alloc("你好,世界!", allocator) + buf = transmute([^]u16)result + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0x4F60) + testing.expect_value(t, buf[1], 0x597D) + testing.expect_value(t, buf[2], 0xFF0C) + testing.expect_value(t, buf[3], 0x4E16) + testing.expect_value(t, buf[4], 0x754C) + testing.expect_value(t, buf[5], 0xFF01) + testing.expect_value(t, buf[6], 0) + + result = win32.utf8_to_wstring_alloc("", allocator) + buf = transmute([^]u16)result + // Valid, and distinguishable from an error. + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0) +}