From 730f417ecbbd01b9fb9d1550d061d4ea225b415c Mon Sep 17 00:00:00 2001 From: bplu4t2f Date: Wed, 25 Feb 2026 23:02:14 +0100 Subject: [PATCH 1/7] Fix MultiByteToWideChar usage in utf8_to_wstring_buf --- core/sys/windows/util.odin | 48 ++++++++++++----- tests/core/sys/windows/util.odin | 93 +++++++++++++++++++++++++++++++- 2 files changed, 127 insertions(+), 14 deletions(-) diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 8cabdeb04..467bab214 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -117,23 +117,27 @@ utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> [ return text[:n] } +// Does not null-terminate the result. @(require_results) utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { - n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), nil, 0) - if n1 == 0 { - return nil - } else if int(n1) > len(buf) { + buf_length := len(buf) + if buf_length == 0 { + // This case must be handled separately because MultiByteToWideChar would interpret + // a buffer length of 0 as a request to calculate the required buffer size. return nil } - - n1 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), n1) - if n1 == 0 { - return nil - } else if int(n1) > len(buf) { + if buf_length > cast(int)max(c_int) { + // Unsupported. return nil } - return buf[:n1] + elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), cast(c_int)buf_length) + if elements_written == 0 { + // Insufficient buffer size or invalid characters. Contents of the buffer may have been modified. + return nil + } + return buf[:elements_written] } + utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf} @(require_results) @@ -146,10 +150,28 @@ utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> @(require_results) utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { - if res := utf8_to_utf16(buf, s); len(res) > 0 { - return wstring(raw_data(res)) + buf_length := len(buf) + if buf_length == 0 { + // We cannot even provide an empty string with a terminating null character. + return nil } - return nil + if len(s) == 0 { + // Empty string. Needs special care here because here, an empty string + // is different from conversion failure. + buf[0] = 0 + return wstring(raw_data(buf)) + } + // We will need to append the terminating null character. + // utf8_to_utf16 does not do that. + res := utf8_to_utf16(buf[:buf_length-1], s) + res_length := len(res) + if res_length == 0 { + // Conversion failure. + return nil + } + assert(res_length < buf_length) + buf[res_length] = 0 + return wstring(raw_data(res)) } utf8_to_wstring :: proc{utf8_to_wstring_alloc, utf8_to_wstring_buf} diff --git a/tests/core/sys/windows/util.odin b/tests/core/sys/windows/util.odin index e2ab9cde0..2d1bcc0a6 100644 --- a/tests/core/sys/windows/util.odin +++ b/tests/core/sys/windows/util.odin @@ -31,4 +31,95 @@ utf16_to_utf8_buf_test :: proc(t: ^testing.T) { res := win32.utf16_to_utf8_buf(buf[:], transmute([]u16)wstr) testing.expect_value(t, res, test.ustr) } -} \ No newline at end of file +} + +@(test) +utf8_to_utf16_buf_test :: proc(t: ^testing.T) { + buf : [100]u16 = --- + // Test everything with a dirty buffer! + reset_buffer :: proc(buf : []u16) { + for i in 0 ..< len(buf) do buf[i] = cast(u16)(i + 1) + } + + result : []u16 + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "Hello\x00, World!") + testing.expect_value(t, len(result), 14) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "你好,世界!") + testing.expect_value(t, len(result), 6) + testing.expect_value(t, result[0], 0x4F60) + testing.expect_value(t, result[1], 0x597D) + testing.expect_value(t, result[2], 0xFF0C) + testing.expect_value(t, result[3], 0x4E16) + testing.expect_value(t, result[4], 0x754C) + testing.expect_value(t, result[5], 0xFF01) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:4], "Hello") + // Buffer too short. + testing.expect(t, result == nil) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "") + // Valid, but indistinguishable from an error. + testing.expect_value(t, len(result), 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:0], "Hello") + // Buffer too short. + testing.expect(t, result == nil) +} + +@(test) +utf8_to_wstring_buf_test :: proc(t : ^testing.T) { + buf : [100]u16 = --- + // Test everything with a dirty buffer! + reset_buffer :: proc(buf : []u16) { + for i in 0 ..< len(buf) do buf[i] = cast(u16)(i + 1) + } + + result : win32.wstring + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "Hello\x00, World!") + testing.expect(t, result != nil) + testing.expect_value(t, buf[13], '!') + testing.expect_value(t, buf[14], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "你好,世界!") + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0x4F60) + testing.expect_value(t, buf[1], 0x597D) + testing.expect_value(t, buf[2], 0xFF0C) + testing.expect_value(t, buf[3], 0x4E16) + testing.expect_value(t, buf[4], 0x754C) + testing.expect_value(t, buf[5], 0xFF01) + testing.expect_value(t, buf[6], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:5], "Hello") + // Buffer too short. + testing.expect_value(t, result, nil) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:6], "Hello") + // Buffer *just* long enough. + testing.expect(t, result != nil) + testing.expect_value(t, buf[4], 'o') + testing.expect_value(t, buf[5], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "") + // Valid, and distinguishable from an error. + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:0], "Hello") + // Buffer too short. + testing.expect(t, result == nil) +} From 572bebd031a6aa95c48ce11532f6d6c525492fae Mon Sep 17 00:00:00 2001 From: bplu4t2f Date: Thu, 26 Feb 2026 18:11:48 +0100 Subject: [PATCH 2/7] Add tests for win32 string conversion functions --- tests/core/sys/windows/util.odin | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tests/core/sys/windows/util.odin b/tests/core/sys/windows/util.odin index 2d1bcc0a6..638844416 100644 --- a/tests/core/sys/windows/util.odin +++ b/tests/core/sys/windows/util.odin @@ -38,7 +38,9 @@ utf8_to_utf16_buf_test :: proc(t: ^testing.T) { buf : [100]u16 = --- // Test everything with a dirty buffer! reset_buffer :: proc(buf : []u16) { - for i in 0 ..< len(buf) do buf[i] = cast(u16)(i + 1) + for i in 0 ..< len(buf) { + buf[i] = cast(u16)(i + 1) + } } result : []u16 @@ -46,6 +48,16 @@ utf8_to_utf16_buf_test :: proc(t: ^testing.T) { reset_buffer(buf[:]) result = win32.utf8_to_utf16_buf(buf[:], "Hello\x00, World!") testing.expect_value(t, len(result), 14) + testing.expect_value(t, result[4], 'o') + testing.expect_value(t, result[5], 0) + testing.expect_value(t, result[6], ',') + testing.expect_value(t, result[13], '!') + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "H\x00\x00") + testing.expect_value(t, len(result), 3) + testing.expect_value(t, result[1], 0) + testing.expect_value(t, result[2], 0) reset_buffer(buf[:]) result = win32.utf8_to_utf16_buf(buf[:], "你好,世界!") @@ -78,7 +90,9 @@ utf8_to_wstring_buf_test :: proc(t : ^testing.T) { buf : [100]u16 = --- // Test everything with a dirty buffer! reset_buffer :: proc(buf : []u16) { - for i in 0 ..< len(buf) do buf[i] = cast(u16)(i + 1) + for i in 0 ..< len(buf) { + buf[i] = cast(u16)(i + 1) + } } result : win32.wstring @@ -89,6 +103,11 @@ utf8_to_wstring_buf_test :: proc(t : ^testing.T) { testing.expect_value(t, buf[13], '!') testing.expect_value(t, buf[14], 0) + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "H\x00\x00") + testing.expect(t, result != nil) + testing.expect_value(t, buf[1], 0) + reset_buffer(buf[:]) result = win32.utf8_to_wstring_buf(buf[:], "你好,世界!") testing.expect(t, result != nil) From e75d0694ba9570e7be215243bc0b32d20026714d Mon Sep 17 00:00:00 2001 From: bplu4t2f Date: Thu, 26 Feb 2026 18:14:34 +0100 Subject: [PATCH 3/7] win32 utf8_to_utf16_buf now null-terminates because _alloc did too. utf8_to_utf16_alloc no longer truncates trailing zero characters. utf8_to_wstring adjusted accordingly, including edge cases. --- core/sys/windows/util.odin | 62 ++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 467bab214..51d12d35c 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -110,14 +110,13 @@ utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> [ return nil } + // null-terminate the result here, even though the null element is not + // part of the slice. This is done to prevent callers which relied on + // this behavior, and is also expected by utf8_to_wstring_alloc. text[n] = 0 - for n >= 1 && text[n-1] == 0 { - n -= 1 - } return text[:n] } -// Does not null-terminate the result. @(require_results) utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { buf_length := len(buf) @@ -127,53 +126,76 @@ utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { return nil } if buf_length > cast(int)max(c_int) { - // Unsupported. - return nil + buf_length = cast(int)max(c_int) } elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), cast(c_int)buf_length) if elements_written == 0 { - // Insufficient buffer size or invalid characters. Contents of the buffer may have been modified. + // Insufficient buffer size, empty input string, or invalid characters. Contents of the buffer may have been modified. return nil } + + // To be consistent with utf8_to_utf16_alloc, the output string + // is null-terminated here in the buffer, even if the terminating null character + // is not part of the returned slice. + if buf_length < cast(int)elements_written + 1 { + // The terminating null character does not fit. + return nil + } + buf[elements_written] = 0 return buf[:elements_written] } +// Converts each UTF-8 code point to UTF-16, including any amount of null characters. +// The resulting slice utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf} @(require_results) utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> wstring { - if res := utf8_to_utf16(s, allocator); len(res) > 0 { - return wstring(raw_data(res)) + if len(s) == 0 { + // Empty string. Needs special care because an empty string + // is different from conversion failure. + buf := make([]u16, 1, allocator) + buf[0] = 0 + return wstring(raw_data(buf)) + } + // utf8_to_utf16_alloc null-terminates the result in the allocated memory block, + // however, the null character is not part of the returned slice (it is just beyond). + // The conversion to wstring will lose this implicit overrun. + res := utf8_to_utf16(s, allocator) + if len(res) > 0 { + return wstring(raw_data(res)) + } else { + // Conversion failure. + return nil } - return nil } @(require_results) utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { buf_length := len(buf) if buf_length == 0 { - // We cannot even provide an empty string with a terminating null character. + // Insufficient buffer size, even for an empty string. return nil } if len(s) == 0 { - // Empty string. Needs special care here because here, an empty string + // Empty string. Needs special care because an empty string // is different from conversion failure. buf[0] = 0 return wstring(raw_data(buf)) } - // We will need to append the terminating null character. - // utf8_to_utf16 does not do that. - res := utf8_to_utf16(buf[:buf_length-1], s) - res_length := len(res) - if res_length == 0 { + res := utf8_to_utf16(buf[:], s) + if len(res) > 0 { + return wstring(raw_data(res)) + } else { // Conversion failure. return nil } - assert(res_length < buf_length) - buf[res_length] = 0 - return wstring(raw_data(res)) } +// Returns a null-termianted wstring, or nil on conversion failure. +// Conversion failure may happen due to an invalid byte sequence in the input string, +// or an insufficient buffer size (utf8_to_wstring_buf only). +// An empty string is valid, and distinct from nil. utf8_to_wstring :: proc{utf8_to_wstring_alloc, utf8_to_wstring_buf} @(require_results) From 1c09a2be55935619a6da59336e1c7e45393bcb9e Mon Sep 17 00:00:00 2001 From: bplu4t2f Date: Thu, 26 Feb 2026 20:49:36 +0100 Subject: [PATCH 4/7] Add tests for utf8_to_utf16_alloc, utf8_to_wstring_alloc --- tests/core/sys/windows/util.odin | 110 +++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/tests/core/sys/windows/util.odin b/tests/core/sys/windows/util.odin index 638844416..777f85776 100644 --- a/tests/core/sys/windows/util.odin +++ b/tests/core/sys/windows/util.odin @@ -4,6 +4,7 @@ package test_core_sys_windows import "base:intrinsics" import "core:testing" import win32 "core:sys/windows" +import runtime "base:runtime" UTF16_Vector :: struct { wstr: win32.wstring, @@ -142,3 +143,112 @@ utf8_to_wstring_buf_test :: proc(t : ^testing.T) { // Buffer too short. testing.expect(t, result == nil) } + +// Custom allocator proc that always returns dirty (non-zeroed) memory. +dirty_allocator_proc :: proc(allocator_data: rawptr, mode: runtime.Allocator_Mode, + size, alignment: int, + old_memory: rawptr, old_size: int, + location: runtime.Source_Code_Location = #caller_location) -> ([]byte, runtime.Allocator_Error) { + real_allocator := cast(^runtime.Allocator)allocator_data + bytes, error := real_allocator.procedure(real_allocator.data, mode, + size, alignment, + old_memory, old_size, + location) + if error == .None { + for i in 0 ..< len(bytes) { + // This will yield a 0 byte on overflow, but that does not matter in this test suite. + bytes[i] = cast(byte)(i + 1) + } + } + return bytes, error +} + +@(test) +utf8_to_utf16_alloc_test :: proc(t : ^testing.T) { + // We want to ensure that everything works with dirty + // (non-zeroed) memory returned from the allocator. + real_allocator := context.temp_allocator + allocator := runtime.Allocator { + procedure = dirty_allocator_proc, + data = cast(rawptr)&real_allocator, + } + + // Test the dirty allocator. + allocator_test_slice := make([]u8, 100, allocator) + testing.expect_value(t, len(allocator_test_slice), 100) + for i in 0 ..< len(allocator_test_slice) { + testing.expect_value(t, allocator_test_slice[i], cast(u8)(i + 1)) + } + + result : []u16 + + result = win32.utf8_to_utf16_alloc("Hello\x00, World!", allocator) + testing.expect_value(t, len(result), 14) + testing.expect_value(t, result[4], 'o') + testing.expect_value(t, result[5], 0) + testing.expect_value(t, result[6], ',') + testing.expect_value(t, result[13], '!') + + result = win32.utf8_to_utf16_alloc("H\x00\x00", allocator) + testing.expect_value(t, len(result), 3) + testing.expect_value(t, result[1], 0) + testing.expect_value(t, result[2], 0) + + result = win32.utf8_to_utf16_alloc("你好,世界!", allocator) + testing.expect_value(t, len(result), 6) + testing.expect_value(t, result[0], 0x4F60) + testing.expect_value(t, result[1], 0x597D) + testing.expect_value(t, result[2], 0xFF0C) + testing.expect_value(t, result[3], 0x4E16) + testing.expect_value(t, result[4], 0x754C) + testing.expect_value(t, result[5], 0xFF01) + + result = win32.utf8_to_utf16_alloc("", allocator) + // Valid, but indistinguishable from an error. + testing.expect_value(t, len(result), 0) +} + +@(test) +utf8_to_wstring_alloc_test :: proc(t : ^testing.T) { + // We want to ensure that everything works with dirty + // (non-zeroed) memory returned from the allocator. + backing_allocator := context.temp_allocator + allocator := runtime.Allocator { + procedure = dirty_allocator_proc, + data = cast(rawptr)&backing_allocator, + } + + result : win32.wstring + buf : [^]u16 + + result = win32.utf8_to_wstring_alloc("Hello\x00, World!", allocator) + buf = transmute([^]u16)result + testing.expect(t, result != nil) + testing.expect_value(t, buf[4], 'o') + testing.expect_value(t, buf[5], 0) + testing.expect_value(t, buf[6], ',') + testing.expect_value(t, buf[13], '!') + testing.expect_value(t, buf[14], 0) + + result = win32.utf8_to_wstring_alloc("H\x00\x00", allocator) + buf = transmute([^]u16)result + testing.expect(t, result != nil) + testing.expect_value(t, buf[1], 0) + + result = win32.utf8_to_wstring_alloc("你好,世界!", allocator) + buf = transmute([^]u16)result + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0x4F60) + testing.expect_value(t, buf[1], 0x597D) + testing.expect_value(t, buf[2], 0xFF0C) + testing.expect_value(t, buf[3], 0x4E16) + testing.expect_value(t, buf[4], 0x754C) + testing.expect_value(t, buf[5], 0xFF01) + testing.expect_value(t, buf[6], 0) + + result = win32.utf8_to_wstring_alloc("", allocator) + buf = transmute([^]u16)result + // Valid, and distinguishable from an error. + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0) +} From fa3441936cf0d3ca13295db33fca8cb217a0b6bc Mon Sep 17 00:00:00 2001 From: bplu4t2f Date: Thu, 26 Feb 2026 20:52:00 +0100 Subject: [PATCH 5/7] Fix utf8_to_utf16_alloc edge case, and improve utf8_to_* documentation --- core/sys/windows/util.odin | 44 +++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 51d12d35c..2c2b8a1e4 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -121,21 +121,27 @@ utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> [ utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { buf_length := len(buf) if buf_length == 0 { - // This case must be handled separately because MultiByteToWideChar would interpret - // a buffer length of 0 as a request to calculate the required buffer size. + return nil + } + s_length := len(s) + if s_length == 0 { + return nil + } + if s_length > cast(int)max(c_int) { + // Unsupported (input string is excessively long). return nil } if buf_length > cast(int)max(c_int) { buf_length = cast(int)max(c_int) } - elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), cast(c_int)buf_length) + elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(s_length), raw_data(buf), cast(c_int)buf_length) if elements_written == 0 { // Insufficient buffer size, empty input string, or invalid characters. Contents of the buffer may have been modified. return nil } // To be consistent with utf8_to_utf16_alloc, the output string - // is null-terminated here in the buffer, even if the terminating null character + // is null-terminated here in the buffer, even though the terminating null character // is not part of the returned slice. if buf_length < cast(int)elements_written + 1 { // The terminating null character does not fit. @@ -145,8 +151,16 @@ utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { return buf[:elements_written] } -// Converts each UTF-8 code point to UTF-16, including any amount of null characters. -// The resulting slice +// Converts a regular UTF-8 `string` to UTF-16. +// +// The conversion includes any null characters present in the input string. +// +// Returns `nil` on conversion failure. +// +// Conversion may fail due to an invalid byte sequence in the input string, +// or an insufficient buffer size (`utf8_to_utf16_buf` only). +// +// The result of converting an empty string is indistinguishable from conversion failure. utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf} @(require_results) @@ -158,9 +172,9 @@ utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> buf[0] = 0 return wstring(raw_data(buf)) } - // utf8_to_utf16_alloc null-terminates the result in the allocated memory block, + // utf8_to_utf16 null-terminates the result in the allocated memory block, // however, the null character is not part of the returned slice (it is just beyond). - // The conversion to wstring will lose this implicit overrun. + // The conversion to wstring will bypass this implicit overrun. res := utf8_to_utf16(s, allocator) if len(res) > 0 { return wstring(raw_data(res)) @@ -183,6 +197,9 @@ utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { buf[0] = 0 return wstring(raw_data(buf)) } + // utf8_to_utf16 null-terminates the result in the buffer, + // however, the null character is not part of the returned slice (it is just beyond). + // The conversion to wstring will bypass this implicit overrun. res := utf8_to_utf16(buf[:], s) if len(res) > 0 { return wstring(raw_data(res)) @@ -192,10 +209,13 @@ utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { } } -// Returns a null-termianted wstring, or nil on conversion failure. -// Conversion failure may happen due to an invalid byte sequence in the input string, -// or an insufficient buffer size (utf8_to_wstring_buf only). -// An empty string is valid, and distinct from nil. +// Converts a regular UTF-8 `string` to UTF-16, and returns the result as a +// null-terminated `wstring`, or `nil` on conversion failure. +// +// Conversion may fail due to an invalid byte sequence in the input string, +// or an insufficient buffer size (`utf8_to_wstring_buf` only). +// +// An empty string is valid, and results in a value distinct from `nil`. utf8_to_wstring :: proc{utf8_to_wstring_alloc, utf8_to_wstring_buf} @(require_results) From 9883e4ec291e425bd1738dff40e5fb4466d4d2aa Mon Sep 17 00:00:00 2001 From: bplu4t2f Date: Fri, 27 Feb 2026 19:16:34 +0100 Subject: [PATCH 6/7] Handle edge cases: Allocation error, extremely long input --- core/sys/windows/util.odin | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 2c2b8a1e4..88ffce05a 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -91,20 +91,28 @@ LANGIDFROMLCID :: #force_inline proc "contextless" (lcid: LCID) -> LANGID { @(require_results) utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> []u16 { - if len(s) < 1 { + s_length := len(s) + if s_length < 1 { + return nil + } + if s_length > cast(int)max(c_int) { + // Unsupported (input string is excessively long). return nil } b := transmute([]byte)s cstr := raw_data(b) - n := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(len(s)), nil, 0) + n := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(s_length), nil, 0) if n == 0 { return nil } text := make([]u16, n+1, allocator) + if text == nil { + return nil + } - n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(len(s)), raw_data(text), n) + n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(s_length), raw_data(text), n) if n1 == 0 { delete(text, allocator) return nil @@ -158,7 +166,8 @@ utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { // Returns `nil` on conversion failure. // // Conversion may fail due to an invalid byte sequence in the input string, -// or an insufficient buffer size (`utf8_to_utf16_buf` only). +// or an insufficient buffer size (`utf8_to_utf16_buf` only), +// or allocation failure (`utf8_to_utf16_alloc` only). // // The result of converting an empty string is indistinguishable from conversion failure. utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf} @@ -169,6 +178,9 @@ utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> // Empty string. Needs special care because an empty string // is different from conversion failure. buf := make([]u16, 1, allocator) + if buf == nil { + return nil + } buf[0] = 0 return wstring(raw_data(buf)) } @@ -213,7 +225,8 @@ utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { // null-terminated `wstring`, or `nil` on conversion failure. // // Conversion may fail due to an invalid byte sequence in the input string, -// or an insufficient buffer size (`utf8_to_wstring_buf` only). +// or an insufficient buffer size (`utf8_to_wstring_buf` only), +// or allocation failure (`utf8_to_wstring_alloc` only). // // An empty string is valid, and results in a value distinct from `nil`. utf8_to_wstring :: proc{utf8_to_wstring_alloc, utf8_to_wstring_buf} From 1e0f5a691cf4682507853e2305df213183692751 Mon Sep 17 00:00:00 2001 From: bplu4t2f Date: Sat, 28 Feb 2026 11:39:12 +0100 Subject: [PATCH 7/7] Fix overflow edge cases on 32-bit systems. --- core/sys/windows/util.odin | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 88ffce05a..542d2718e 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -103,17 +103,21 @@ utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> [ b := transmute([]byte)s cstr := raw_data(b) n := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(s_length), nil, 0) - if n == 0 { + if n <= 0 || cast(int)n >= max(int) { + // If n is equal to or greater than max(int), then we will not be able + // to create a big enough slice with the null terminator. + // NOTE: This only affects 32-bit systems and is purely pedantic because + // the system will never be able to allocate that much memory. return nil } - text := make([]u16, n+1, allocator) + text := make([]u16, cast(int)n + 1, allocator) if text == nil { return nil } n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, cstr, c_int(s_length), raw_data(text), n) - if n1 == 0 { + if n1 <= 0 { delete(text, allocator) return nil } @@ -128,7 +132,7 @@ utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> [ @(require_results) utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { buf_length := len(buf) - if buf_length == 0 { + if buf_length < 1 { return nil } s_length := len(s) @@ -143,7 +147,7 @@ utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { buf_length = cast(int)max(c_int) } elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(s_length), raw_data(buf), cast(c_int)buf_length) - if elements_written == 0 { + if elements_written <= 0 { // Insufficient buffer size, empty input string, or invalid characters. Contents of the buffer may have been modified. return nil } @@ -151,8 +155,9 @@ utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { // To be consistent with utf8_to_utf16_alloc, the output string // is null-terminated here in the buffer, even though the terminating null character // is not part of the returned slice. - if buf_length < cast(int)elements_written + 1 { + if buf_length <= cast(int)elements_written { // The terminating null character does not fit. + // Need at least a length of (elements_written+1). return nil } buf[elements_written] = 0