diff --git a/src/string.cpp b/src/string.cpp index 9c08114a7..c26457acf 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -635,26 +635,107 @@ gb_internal String normalize_path(gbAllocator a, String const &path, String cons #elif defined(GB_SYSTEM_UNIX) || defined(GB_SYSTEM_OSX) #include + gb_internal void utf16_encode_surrogate_pair(Rune r, u16 *r1, u16 *r2) { + static Rune const _surr_self = 0x10000; + static Rune const _surr1 = 0xd800; + static Rune const _surr2 = 0xdc00; + Rune r_ = r - _surr_self; + *r1 = _surr1 + ((r_ >> 10) & 0x3ff); + *r2 = _surr2 + (r_ & 0x3ff); + } + + gb_internal isize utf16_decode(u16 const *s, isize n, Rune *r) { + static Rune const _surr1 = 0xd800; + static Rune const _surr2 = 0xdc00; + static Rune const _surr3 = 0xe000; + static Rune const _surr_self = 0x10000; + if (n < 1) { + *r = GB_RUNE_INVALID; + return 0; + } + u16 c = s[0]; + if (c < 0xd800 || c > 0xdfff) { + *r = cast(Rune)c; + return 1; + } + if (c >= 0xdc00) { + *r = GB_RUNE_INVALID; + return 1; + } + if (n < 2) { + *r = GB_RUNE_INVALID; + return 1; + } + u16 c2 = s[1]; + if (c2 < 0xdc00 || c2 > 0xdfff) { + *r = GB_RUNE_INVALID; + return 1; + } + *r = (((c-_surr1)<<10) | (c2 - _surr2)) + _surr_self; + return 2; + } + gb_internal int convert_multibyte_to_widechar(char const *multibyte_input, usize input_length, wchar_t *output, usize output_size) { - String string = copy_string(heap_allocator(), make_string(cast(u8 const*)multibyte_input, input_length)); /* Guarantee NULL terminator */ - u8* input = string.text; + u16 *out = cast(u16 *)output; + String s = make_string(cast(u8 const *)multibyte_input, input_length); + isize i = 0; + isize output_len = 0; + while (i < s.len) { + Rune r = 0; + isize width = utf8_decode(s.text + i, s.len - i, &r); + if (r == GB_RUNE_INVALID) { + return -1; + } + i += width; + if (r < 0x10000) { + if (out) { + if (cast(usize)output_len+1 > output_size) { + return -1; + } + out[output_len] = cast(u16)r; + } + output_len += 1; + } else { + if (out) { + if (cast(usize)output_len+2 > output_size) { + return -1; + } + u16 r1, r2; + utf16_encode_surrogate_pair(r, &r1, &r2); + out[output_len+0] = r1; + out[output_len+1] = r2; + } + output_len += 2; + } + } - mbstate_t ps = { 0 }; - size_t result = mbsrtowcs(output, cast(const char**)&input, output_size, &ps); - - gb_free(heap_allocator(), string.text); - return (result == (size_t)-1) ? -1 : (int)result; + return cast(int)output_len; } gb_internal int convert_widechar_to_multibyte(wchar_t const *widechar_input, usize input_length, char* output, usize output_size) { - String string = copy_string(heap_allocator(), make_string(cast(u8 const*)widechar_input, input_length)); /* Guarantee NULL terminator */ - u8* input = string.text; + u16 const *in = cast(u16 const *)widechar_input; + isize i = 0; + isize output_len = 0; + while (i < input_length) { + Rune r; + isize width = utf16_decode(in + i, input_length - i, &r); + if (r == GB_RUNE_INVALID) { + return -1; + } + i += width; - mbstate_t ps = { 0 }; - size_t result = wcsrtombs(output, cast(const wchar_t**)&input, output_size, &ps); + u8 buf[4]; + isize char_len = gb_utf8_encode_rune(buf, r); - gb_free(heap_allocator(), string.text); - return (result == (size_t)-1) ? -1 : (int)result; + if(output) { + if (cast(usize)output_len+cast(usize)char_len > output_size) { + return -1; + } + gb_memmove(output + output_len, buf, char_len); + } + output_len += char_len; + } + return cast(int)output_len; } #else #error Implement system diff --git a/tests/issues/run.bat b/tests/issues/run.bat index 0ceaf554c..9106a339b 100644 --- a/tests/issues/run.bat +++ b/tests/issues/run.bat @@ -27,6 +27,7 @@ set COMMON=-define:ODIN_TEST_FANCY=false -file -vet -strict-style -ignore-unused ..\..\..\odin build ..\test_issue_5265.odin %COMMON% || exit /b ..\..\..\odin test ..\test_issue_5699.odin %COMMON% || exit /b ..\..\..\odin test ..\test_issue_6068.odin %COMMON% || exit /b +..\..\..\odin test ..\test_issue_6101.odin %COMMON% || exit /b @echo off diff --git a/tests/issues/run.sh b/tests/issues/run.sh index ce02ba20a..58382c7ad 100755 --- a/tests/issues/run.sh +++ b/tests/issues/run.sh @@ -34,6 +34,7 @@ $ODIN build ../test_issue_5097-2.odin $COMMON $ODIN build ../test_issue_5265.odin $COMMON $ODIN test ../test_issue_5699.odin $COMMON $ODIN test ../test_issue_6068.odin $COMMON +$ODIN test ../test_issue_6101.odin $COMMON set +x diff --git a/tests/issues/test_issue_6101.odin b/tests/issues/test_issue_6101.odin new file mode 100644 index 000000000..9f24ade52 --- /dev/null +++ b/tests/issues/test_issue_6101.odin @@ -0,0 +1,23 @@ +// Tests issue #6101 https://github.com/odin-lang/Odin/issues/6101 +package test_issues + +import "core:testing" + +@(test) +test_issue_6101_bmp :: proc(t: ^testing.T) { + s := string16("\u732b") + testing.expect_value(t, len(s), 1) + + u := transmute([]u16)s + testing.expect_value(t, u[0], 0x732b) +} + +@(test) +test_issue_6101_non_bmp :: proc(t: ^testing.T) { + s := string16("\U0001F63A") + testing.expect_value(t, len(s), 2) + + u := transmute([]u16)s + testing.expect_value(t, u[0], 0xD83D) + testing.expect_value(t, u[1], 0xDE3A) +}