From 771d308d643f9e3d59b2451fe18c5bdd845aba1e Mon Sep 17 00:00:00 2001 From: bobsayshilol Date: Sun, 27 Oct 2024 18:11:46 +0000 Subject: [PATCH 1/6] Fix invalid union access UBSan spotted that |src->Basic.kind| had a value outside the range of |BasicKind| due to it actually being a |Type_Pointer|. Since these are stored in a union there could be cases where the value of |kind| just so happens to be |Basic_string|, in which case the branch would have been taken when it shouldn't have been. To fix this simply check that it's a |Type_Basic| before treating it as a |Basic|. --- src/llvm_backend_expr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llvm_backend_expr.cpp b/src/llvm_backend_expr.cpp index de7cadaf3..b5f6437a4 100644 --- a/src/llvm_backend_expr.cpp +++ b/src/llvm_backend_expr.cpp @@ -1647,7 +1647,7 @@ gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t) { lb_emit_store(p, a1, id); return lb_addr_load(p, res); } else if (dst->kind == Type_Basic) { - if (src->Basic.kind == Basic_string && dst->Basic.kind == Basic_cstring) { + if (src->kind == Type_Basic && src->Basic.kind == Basic_string && dst->Basic.kind == Basic_cstring) { String str = lb_get_const_string(m, value); lbValue res = {}; res.type = t; From e67692b0661ab23837079b2ed7340ff85ca88cd6 Mon Sep 17 00:00:00 2001 From: bobsayshilol Date: Sun, 27 Oct 2024 18:35:14 +0000 Subject: [PATCH 2/6] Avoid member access through nullptr in debug If |result_count| is 0 then |results| will be a nullptr and hence the access |results->Tuple| is undefined behaviour. There's already an early return in the 0 branch so move that to be the first thing so that we can guarantee that it's not a nullptr. Note that technically we take the address of the result so it's not actually dereferencing it, however UBSan doesn't care about that. --- src/llvm_backend_stmt.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index 288e7206a..06d66ac80 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -2018,14 +2018,7 @@ gb_internal void lb_build_return_stmt_internal(lbProcedure *p, lbValue res) { gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return_results) { lb_ensure_abi_function_type(p->module, p); - lbValue res = {}; - - TypeTuple *tuple = &p->type->Proc.results->Tuple; isize return_count = p->type->Proc.result_count; - isize res_count = return_results.count; - - lbFunctionType *ft = lb_get_function_type(p->module, p->type); - bool return_by_pointer = ft->ret.kind == lbArg_Indirect; if (return_count == 0) { // No return values @@ -2038,7 +2031,17 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return LLVMBuildRetVoid(p->builder); } return; - } else if (return_count == 1) { + } + + lbValue res = {}; + + TypeTuple *tuple = &p->type->Proc.results->Tuple; + isize res_count = return_results.count; + + lbFunctionType *ft = lb_get_function_type(p->module, p->type); + bool return_by_pointer = ft->ret.kind == lbArg_Indirect; + + if (return_count == 1) { Entity *e = tuple->variables[0]; if (res_count == 0) { rw_mutex_shared_lock(&p->module->values_mutex); From 4f800a7fdaaa430226108e3bb2bf9fafbda41be8 Mon Sep 17 00:00:00 2001 From: bobsayshilol Date: Sun, 27 Oct 2024 19:42:25 +0000 Subject: [PATCH 3/6] Avoid undefined arithmetic shifting The result of a left shift on a positive signed integer (Rune) must fit into an unsigned integer otherwise it's undefined behaviour, as is left shifting a negative integer by any amount. This code can only be hit if |x >= 0xf0| and hence a left shift of 31 will always be undefined unless the input is 0 or 1. To avoid hitting this we can instead extend the lowest bit to be the mask if we assume that ints are 2's complement, which we already do elsewhere. This generates identical code in testing on Compiler Explorer and the Odin test suite passes locally with this change. Note that the original code would change to be defined behaviour in C++20, however we are currently build with |-std=c++14| in the build scripts. --- src/unicode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/unicode.cpp b/src/unicode.cpp index 665d5b182..ef95cde71 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -109,7 +109,7 @@ gb_internal isize utf8_decode(u8 const *str, isize str_len, Rune *codepoint_out) u8 b1, b2, b3; Utf8AcceptRange accept; if (x >= 0xf0) { - Rune mask = (cast(Rune)x << 31) >> 31; + Rune mask = -cast(Rune)(x & 1); codepoint = (cast(Rune)s0 & (~mask)) | (GB_RUNE_INVALID & mask); width = 1; goto end; From c1496ab6c055974ac401865bdca87a4fc0a76ea3 Mon Sep 17 00:00:00 2001 From: bobsayshilol Date: Sun, 27 Oct 2024 20:11:42 +0000 Subject: [PATCH 4/6] Fix passing nullptr to args marked as non-null libstdc++'s |memcpy| and |memset| both state that their inputs should never be a nullptr since this matches the C spec. Some compilers act on these hints, so we shouldn't unconditionally call these as it would signal to the compiler that they can't be nullptrs. As an example, the following code will always call |do_something()| when compiled with optimisations since GCC version 4.9: ``` void clear(void *ptr, int size) { memset(ptr, 0, size); } void example(void *ptr, int size) { clear(ptr, size); if (ptr != nullptr) do_something(); } ``` --- src/gb/gb.h | 6 +++++- src/string.cpp | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gb/gb.h b/src/gb/gb.h index 1fef4b4f5..f74026c7d 100644 --- a/src/gb/gb.h +++ b/src/gb/gb.h @@ -2541,7 +2541,11 @@ gb_inline void const *gb_pointer_add_const(void const *ptr, isize bytes) { gb_inline void const *gb_pointer_sub_const(void const *ptr, isize bytes) { return cast(void const *)(cast(u8 const *)ptr - bytes); } gb_inline isize gb_pointer_diff (void const *begin, void const *end) { return cast(isize)(cast(u8 const *)end - cast(u8 const *)begin); } -gb_inline void gb_zero_size(void *ptr, isize size) { memset(ptr, 0, size); } +gb_inline void gb_zero_size(void *ptr, isize size) { + if (size != 0) { + memset(ptr, 0, size); + } +} #if defined(_MSC_VER) && !defined(__clang__) diff --git a/src/string.cpp b/src/string.cpp index 3c7d96934..190b69041 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -156,6 +156,7 @@ gb_internal isize string_index_byte(String const &s, u8 x) { gb_internal gb_inline bool str_eq(String const &a, String const &b) { if (a.len != b.len) return false; + if (a.len == 0) return true; return memcmp(a.text, b.text, a.len) == 0; } gb_internal gb_inline bool str_ne(String const &a, String const &b) { return !str_eq(a, b); } From bb308b3ff4a88fa2ab17aef087ec91b46d9a3768 Mon Sep 17 00:00:00 2001 From: bobsayshilol Date: Sun, 27 Oct 2024 20:23:50 +0000 Subject: [PATCH 5/6] Add missing guards around push/pop pragmas This matches all the other places where we silence Windows warnings. --- src/unicode.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/unicode.cpp b/src/unicode.cpp index ef95cde71..cb9fb12ab 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -1,10 +1,15 @@ -#pragma warning(push) -#pragma warning(disable: 4245) +#if defined(GB_SYSTEM_WINDOWS) + #pragma warning(push) + #pragma warning(disable: 4245) +#endif extern "C" { #include "utf8proc/utf8proc.c" } -#pragma warning(pop) + +#if defined(GB_SYSTEM_WINDOWS) + #pragma warning(pop) +#endif gb_internal bool rune_is_letter(Rune r) { From b59647084b11a7f08ad3aa54ae47ceb157f12c46 Mon Sep 17 00:00:00 2001 From: bobsayshilol Date: Sun, 27 Oct 2024 20:26:34 +0000 Subject: [PATCH 6/6] Plug a memory leak The call to |array_make()| always allocates and since this variable was unused it lead to a leak. Simply plug it by removing it. --- src/parser.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parser.cpp b/src/parser.cpp index 4029d49de..9d8b0d231 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -4262,8 +4262,6 @@ gb_internal bool allow_field_separator(AstFile *f) { gb_internal Ast *parse_struct_field_list(AstFile *f, isize *name_count_) { Token start_token = f->curr_token; - auto decls = array_make(ast_allocator(f)); - isize total_name_count = 0; Ast *params = parse_field_list(f, &total_name_count, FieldFlag_Struct, Token_CloseBrace, false, false);