From 30fb2ed59d8ea2a245339f82cf3a93c0b5fd8adc Mon Sep 17 00:00:00 2001 From: bobsayshilol Date: Sun, 27 Oct 2024 19:42:25 +0000 Subject: [PATCH] Avoid undefined arithmetic shifting The result of a left shift on a positive signed integer (Rune) must fit into an unsigned integer otherwise it's undefined behaviour, as is left shifting a negative integer by any amount. This code can only be hit if |x >= 0xf0| and hence a left shift of 31 will always be undefined unless the input is 0 or 1. To avoid hitting this we can instead extend the lowest bit to be the mask if we assume that ints are 2's complement, which we already do elsewhere. This generates identical code in testing on Compiler Explorer and the Odin test suite passes locally with this change. Note that the original code would change to be defined behaviour in C++20, however we are currently build with |-std=c++14| in the build scripts. --- src/unicode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/unicode.cpp b/src/unicode.cpp index 665d5b182..ef95cde71 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -109,7 +109,7 @@ gb_internal isize utf8_decode(u8 const *str, isize str_len, Rune *codepoint_out) u8 b1, b2, b3; Utf8AcceptRange accept; if (x >= 0xf0) { - Rune mask = (cast(Rune)x << 31) >> 31; + Rune mask = -cast(Rune)(x & 1); codepoint = (cast(Rune)s0 & (~mask)) | (GB_RUNE_INVALID & mask); width = 1; goto end;