diff --git a/core/sys/windows/util.odin b/core/sys/windows/util.odin index 8cabdeb04..467bab214 100644 --- a/core/sys/windows/util.odin +++ b/core/sys/windows/util.odin @@ -117,23 +117,27 @@ utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> [ return text[:n] } +// Does not null-terminate the result. @(require_results) utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 { - n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), nil, 0) - if n1 == 0 { - return nil - } else if int(n1) > len(buf) { + buf_length := len(buf) + if buf_length == 0 { + // This case must be handled separately because MultiByteToWideChar would interpret + // a buffer length of 0 as a request to calculate the required buffer size. return nil } - - n1 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), n1) - if n1 == 0 { - return nil - } else if int(n1) > len(buf) { + if buf_length > cast(int)max(c_int) { + // Unsupported. return nil } - return buf[:n1] + elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), cast(c_int)buf_length) + if elements_written == 0 { + // Insufficient buffer size or invalid characters. Contents of the buffer may have been modified. + return nil + } + return buf[:elements_written] } + utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf} @(require_results) @@ -146,10 +150,28 @@ utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) -> @(require_results) utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring { - if res := utf8_to_utf16(buf, s); len(res) > 0 { - return wstring(raw_data(res)) + buf_length := len(buf) + if buf_length == 0 { + // We cannot even provide an empty string with a terminating null character. + return nil } - return nil + if len(s) == 0 { + // Empty string. Needs special care here because here, an empty string + // is different from conversion failure. + buf[0] = 0 + return wstring(raw_data(buf)) + } + // We will need to append the terminating null character. + // utf8_to_utf16 does not do that. + res := utf8_to_utf16(buf[:buf_length-1], s) + res_length := len(res) + if res_length == 0 { + // Conversion failure. + return nil + } + assert(res_length < buf_length) + buf[res_length] = 0 + return wstring(raw_data(res)) } utf8_to_wstring :: proc{utf8_to_wstring_alloc, utf8_to_wstring_buf} diff --git a/tests/core/sys/windows/util.odin b/tests/core/sys/windows/util.odin index e2ab9cde0..2d1bcc0a6 100644 --- a/tests/core/sys/windows/util.odin +++ b/tests/core/sys/windows/util.odin @@ -31,4 +31,95 @@ utf16_to_utf8_buf_test :: proc(t: ^testing.T) { res := win32.utf16_to_utf8_buf(buf[:], transmute([]u16)wstr) testing.expect_value(t, res, test.ustr) } -} \ No newline at end of file +} + +@(test) +utf8_to_utf16_buf_test :: proc(t: ^testing.T) { + buf : [100]u16 = --- + // Test everything with a dirty buffer! + reset_buffer :: proc(buf : []u16) { + for i in 0 ..< len(buf) do buf[i] = cast(u16)(i + 1) + } + + result : []u16 + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "Hello\x00, World!") + testing.expect_value(t, len(result), 14) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "你好,世界!") + testing.expect_value(t, len(result), 6) + testing.expect_value(t, result[0], 0x4F60) + testing.expect_value(t, result[1], 0x597D) + testing.expect_value(t, result[2], 0xFF0C) + testing.expect_value(t, result[3], 0x4E16) + testing.expect_value(t, result[4], 0x754C) + testing.expect_value(t, result[5], 0xFF01) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:4], "Hello") + // Buffer too short. + testing.expect(t, result == nil) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:], "") + // Valid, but indistinguishable from an error. + testing.expect_value(t, len(result), 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_utf16_buf(buf[:0], "Hello") + // Buffer too short. + testing.expect(t, result == nil) +} + +@(test) +utf8_to_wstring_buf_test :: proc(t : ^testing.T) { + buf : [100]u16 = --- + // Test everything with a dirty buffer! + reset_buffer :: proc(buf : []u16) { + for i in 0 ..< len(buf) do buf[i] = cast(u16)(i + 1) + } + + result : win32.wstring + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "Hello\x00, World!") + testing.expect(t, result != nil) + testing.expect_value(t, buf[13], '!') + testing.expect_value(t, buf[14], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "你好,世界!") + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0x4F60) + testing.expect_value(t, buf[1], 0x597D) + testing.expect_value(t, buf[2], 0xFF0C) + testing.expect_value(t, buf[3], 0x4E16) + testing.expect_value(t, buf[4], 0x754C) + testing.expect_value(t, buf[5], 0xFF01) + testing.expect_value(t, buf[6], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:5], "Hello") + // Buffer too short. + testing.expect_value(t, result, nil) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:6], "Hello") + // Buffer *just* long enough. + testing.expect(t, result != nil) + testing.expect_value(t, buf[4], 'o') + testing.expect_value(t, buf[5], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:], "") + // Valid, and distinguishable from an error. + testing.expect(t, result != nil) + testing.expect_value(t, buf[0], 0) + + reset_buffer(buf[:]) + result = win32.utf8_to_wstring_buf(buf[:0], "Hello") + // Buffer too short. + testing.expect(t, result == nil) +}