Fix MultiByteToWideChar usage in utf8_to_wstring_buf

This commit is contained in:
bplu4t2f
2026-02-25 23:02:14 +01:00
parent bdcb9e3fef
commit 730f417ecb
2 changed files with 127 additions and 14 deletions

View File

@@ -117,23 +117,27 @@ utf8_to_utf16_alloc :: proc(s: string, allocator := context.temp_allocator) -> [
return text[:n]
}
// Does not null-terminate the result.
@(require_results)
utf8_to_utf16_buf :: proc(buf: []u16, s: string) -> []u16 {
n1 := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), nil, 0)
if n1 == 0 {
return nil
} else if int(n1) > len(buf) {
buf_length := len(buf)
if buf_length == 0 {
// This case must be handled separately because MultiByteToWideChar would interpret
// a buffer length of 0 as a request to calculate the required buffer size.
return nil
}
n1 = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), n1)
if n1 == 0 {
return nil
} else if int(n1) > len(buf) {
if buf_length > cast(int)max(c_int) {
// Unsupported.
return nil
}
return buf[:n1]
elements_written := MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, raw_data(s), c_int(len(s)), raw_data(buf[:]), cast(c_int)buf_length)
if elements_written == 0 {
// Insufficient buffer size or invalid characters. Contents of the buffer may have been modified.
return nil
}
return buf[:elements_written]
}
utf8_to_utf16 :: proc{utf8_to_utf16_alloc, utf8_to_utf16_buf}
@(require_results)
@@ -146,10 +150,28 @@ utf8_to_wstring_alloc :: proc(s: string, allocator := context.temp_allocator) ->
@(require_results)
utf8_to_wstring_buf :: proc(buf: []u16, s: string) -> wstring {
if res := utf8_to_utf16(buf, s); len(res) > 0 {
return wstring(raw_data(res))
buf_length := len(buf)
if buf_length == 0 {
// We cannot even provide an empty string with a terminating null character.
return nil
}
return nil
if len(s) == 0 {
// Empty string. Needs special care here because here, an empty string
// is different from conversion failure.
buf[0] = 0
return wstring(raw_data(buf))
}
// We will need to append the terminating null character.
// utf8_to_utf16 does not do that.
res := utf8_to_utf16(buf[:buf_length-1], s)
res_length := len(res)
if res_length == 0 {
// Conversion failure.
return nil
}
assert(res_length < buf_length)
buf[res_length] = 0
return wstring(raw_data(res))
}
utf8_to_wstring :: proc{utf8_to_wstring_alloc, utf8_to_wstring_buf}

View File

@@ -31,4 +31,95 @@ utf16_to_utf8_buf_test :: proc(t: ^testing.T) {
res := win32.utf16_to_utf8_buf(buf[:], transmute([]u16)wstr)
testing.expect_value(t, res, test.ustr)
}
}
}
@(test)
utf8_to_utf16_buf_test :: proc(t: ^testing.T) {
buf : [100]u16 = ---
// Test everything with a dirty buffer!
reset_buffer :: proc(buf : []u16) {
for i in 0 ..< len(buf) do buf[i] = cast(u16)(i + 1)
}
result : []u16
reset_buffer(buf[:])
result = win32.utf8_to_utf16_buf(buf[:], "Hello\x00, World!")
testing.expect_value(t, len(result), 14)
reset_buffer(buf[:])
result = win32.utf8_to_utf16_buf(buf[:], "你好,世界!")
testing.expect_value(t, len(result), 6)
testing.expect_value(t, result[0], 0x4F60)
testing.expect_value(t, result[1], 0x597D)
testing.expect_value(t, result[2], 0xFF0C)
testing.expect_value(t, result[3], 0x4E16)
testing.expect_value(t, result[4], 0x754C)
testing.expect_value(t, result[5], 0xFF01)
reset_buffer(buf[:])
result = win32.utf8_to_utf16_buf(buf[:4], "Hello")
// Buffer too short.
testing.expect(t, result == nil)
reset_buffer(buf[:])
result = win32.utf8_to_utf16_buf(buf[:], "")
// Valid, but indistinguishable from an error.
testing.expect_value(t, len(result), 0)
reset_buffer(buf[:])
result = win32.utf8_to_utf16_buf(buf[:0], "Hello")
// Buffer too short.
testing.expect(t, result == nil)
}
@(test)
utf8_to_wstring_buf_test :: proc(t : ^testing.T) {
buf : [100]u16 = ---
// Test everything with a dirty buffer!
reset_buffer :: proc(buf : []u16) {
for i in 0 ..< len(buf) do buf[i] = cast(u16)(i + 1)
}
result : win32.wstring
reset_buffer(buf[:])
result = win32.utf8_to_wstring_buf(buf[:], "Hello\x00, World!")
testing.expect(t, result != nil)
testing.expect_value(t, buf[13], '!')
testing.expect_value(t, buf[14], 0)
reset_buffer(buf[:])
result = win32.utf8_to_wstring_buf(buf[:], "你好,世界!")
testing.expect(t, result != nil)
testing.expect_value(t, buf[0], 0x4F60)
testing.expect_value(t, buf[1], 0x597D)
testing.expect_value(t, buf[2], 0xFF0C)
testing.expect_value(t, buf[3], 0x4E16)
testing.expect_value(t, buf[4], 0x754C)
testing.expect_value(t, buf[5], 0xFF01)
testing.expect_value(t, buf[6], 0)
reset_buffer(buf[:])
result = win32.utf8_to_wstring_buf(buf[:5], "Hello")
// Buffer too short.
testing.expect_value(t, result, nil)
reset_buffer(buf[:])
result = win32.utf8_to_wstring_buf(buf[:6], "Hello")
// Buffer *just* long enough.
testing.expect(t, result != nil)
testing.expect_value(t, buf[4], 'o')
testing.expect_value(t, buf[5], 0)
reset_buffer(buf[:])
result = win32.utf8_to_wstring_buf(buf[:], "")
// Valid, and distinguishable from an error.
testing.expect(t, result != nil)
testing.expect_value(t, buf[0], 0)
reset_buffer(buf[:])
result = win32.utf8_to_wstring_buf(buf[:0], "Hello")
// Buffer too short.
testing.expect(t, result == nil)
}