From 5d165f70e1a9c5d554e87ccee373fdeb5395d15a Mon Sep 17 00:00:00 2001 From: gingerBill Date: Sun, 15 Mar 2026 21:53:38 +0000 Subject: [PATCH] Add loads of RVO optimizations for basic 1-value return cases --- src/llvm_backend.hpp | 6 +- src/llvm_backend_proc.cpp | 19 ++-- src/llvm_backend_stmt.cpp | 201 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+), 10 deletions(-) diff --git a/src/llvm_backend.hpp b/src/llvm_backend.hpp index 48c5be546..7b7d5cc31 100644 --- a/src/llvm_backend.hpp +++ b/src/llvm_backend.hpp @@ -359,6 +359,7 @@ struct lbProcedure { std::atomic is_done; lbAddr return_ptr; + Entity * sret_rvo_entity; // Local aliases of `return_ptr` Array defer_stmts; Array blocks; Array branch_blocks; @@ -486,7 +487,7 @@ gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlo gb_internal void lb_emit_defer_stmts(lbProcedure *p, lbDeferExitKind kind, lbBlock *block, Ast *node); gb_internal lbValue lb_emit_transmute(lbProcedure *p, lbValue value, Type *t); gb_internal lbValue lb_emit_comp(lbProcedure *p, TokenKind op_kind, lbValue left, lbValue right); -gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining = ProcInlining_none, ProcTailing tailing = ProcTailing_none); +gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining = ProcInlining_none, ProcTailing tailing = ProcTailing_none, lbValue *sret_dst = nullptr); gb_internal lbValue lb_emit_conv(lbProcedure *p, lbValue value, Type *t); gb_internal lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, lbValue x); @@ -494,11 +495,10 @@ gb_internal void lb_emit_jump(lbProcedure *p, lbBlock *target_block); gb_internal void lb_emit_if(lbProcedure *p, lbValue cond, lbBlock *true_block, lbBlock *false_block); gb_internal void lb_start_block(lbProcedure *p, lbBlock *b); -gb_internal lbValue lb_build_call_expr(lbProcedure *p, Ast *expr); +gb_internal lbValue lb_build_call_expr(lbProcedure *p, Ast *expr, lbValue *sret_dst = nullptr); gb_internal lbProcedure *lb_create_dummy_procedure(lbModule *m, String link_name, Type *type); gb_internal void lb_begin_procedure_body(lbProcedure *p); gb_internal void lb_end_procedure_body(lbProcedure *p); -gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining); gb_internal lbAddr lb_find_or_generate_context_ptr(lbProcedure *p); gb_internal lbContextData *lb_push_context_onto_stack(lbProcedure *p, lbAddr ctx); diff --git a/src/llvm_backend_proc.cpp b/src/llvm_backend_proc.cpp index 4033c12e8..88165f4d2 100644 --- a/src/llvm_backend_proc.cpp +++ b/src/llvm_backend_proc.cpp @@ -1073,7 +1073,7 @@ gb_internal lbValue lb_emit_conjugate(lbProcedure *p, lbValue val, Type *type) { return lb_emit_load(p, res); } -gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining, ProcTailing tailing) { +gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array const &args, ProcInlining inlining, ProcTailing tailing, lbValue *sret_dst) { lbModule *m = p->module; Type *pt = base_type(value.type); @@ -1195,7 +1195,12 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array c } if (return_by_pointer) { - lbValue return_ptr = lb_add_local_generated(p, rt, true).addr; + lbValue return_ptr = {}; + if (sret_dst != nullptr) { + return_ptr = *sret_dst; + } else { + return_ptr = lb_add_local_generated(p, rt, true).addr; + } lb_emit_call_internal(p, value, return_ptr, processed_args, nullptr, context_ptr, inlining, tailing); result = lb_emit_load(p, return_ptr); } else if (rt != nullptr) { @@ -4126,13 +4131,13 @@ gb_internal lbValue lb_handle_param_value(lbProcedure *p, Type *parameter_type, } -gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr); +gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr, lbValue *sret_dst = nullptr); -gb_internal lbValue lb_build_call_expr(lbProcedure *p, Ast *expr) { +gb_internal lbValue lb_build_call_expr(lbProcedure *p, Ast *expr, lbValue *sret_dst) { expr = unparen_expr(expr); ast_node(ce, CallExpr, expr); - lbValue res = lb_build_call_expr_internal(p, expr); + lbValue res = lb_build_call_expr_internal(p, expr, sret_dst); if (ce->optional_ok_one) { GB_ASSERT(is_type_tuple(res.type)); @@ -4153,7 +4158,7 @@ gb_internal void lb_add_values_to_array(lbProcedure *p, Array *args, lb } } -gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { +gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr, lbValue *sret_dst) { lbModule *m = p->module; TypeAndValue tv = type_and_value_of_expr(expr); @@ -4479,6 +4484,6 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) { } } - return lb_emit_call(p, value, call_args, inlining, tailing); + return lb_emit_call(p, value, call_args, inlining, tailing, sret_dst); } diff --git a/src/llvm_backend_stmt.cpp b/src/llvm_backend_stmt.cpp index 77e5ec0d1..80d52ef5b 100644 --- a/src/llvm_backend_stmt.cpp +++ b/src/llvm_backend_stmt.cpp @@ -1,3 +1,127 @@ +#define LB_ENABLE_RVO true + +// NOTE(bill): @RVO Check if a call expression returns by sret with a return type matching dst_type. +// Returns the callee's function type if eligible for copy elision, nullptr otherwise. +gb_internal lbFunctionType *lb_call_sret_eligible(lbProcedure *p, Ast *call_expr, Type *dst_type) { + GB_ASSERT(call_expr->kind == Ast_CallExpr); + Ast *proc_expr = unparen_expr(call_expr->CallExpr.proc); + TypeAndValue proc_tv = type_and_value_of_expr(proc_expr); + if (proc_tv.mode == Addressing_Type || proc_tv.mode == Addressing_Builtin) { + return nullptr; + } + Type *pt = base_type(proc_tv.type); + if (pt == nullptr || pt->kind != Type_Proc || pt->Proc.results == nullptr) { + return nullptr; + } + lbFunctionType *callee_ft = lb_get_function_type(p->module, pt); + if (callee_ft->ret.kind != lbArg_Indirect) { + return nullptr; + } + Type *callee_ret = reduce_tuple_to_single_type(pt->Proc.results); + if (callee_ret == nullptr || !are_types_identical(dst_type, callee_ret)) { + return nullptr; + } + return callee_ft; +} + +// NOTE(bill): @RVO `sret` scan for `x := call(); ...; return x` pattern. +// When matched, `x` alloca will be the sret pointer itself, eliminating +// the copy from `x` to the sret buffer on return. +gb_internal void lb_scan_for_sret_rvo(lbProcedure *p) { + if (p->body == nullptr || p->body->kind != Ast_BlockStmt) { + return; + } + Type *proc_type = p->type; + if (proc_type->Proc.result_count != 1 || proc_type->Proc.results == nullptr) { + return; + } + lbFunctionType *ft = lb_get_function_type(p->module, proc_type); + if (ft->ret.kind != lbArg_Indirect) { + return; + } + + Slice stmts = p->body->BlockStmt.stmts; + if (stmts.count < 2) { + return; + } + + // Last stmt must be `return x` where x is an identifier + Ast *last = stmts[stmts.count - 1]; + if (last->kind != Ast_ReturnStmt) { + return; + } + Slice results = last->ReturnStmt.results; + if (results.count != 1) { + return; + } + Ast *ret_expr = unparen_expr(results[0]); + if (ret_expr->kind != Ast_Ident) { + return; + } + Entity *ret_entity = entity_of_node(ret_expr); + if (ret_entity == nullptr || ret_entity->kind != Entity_Variable) { + return; + } + + Type *ret_type = reduce_tuple_to_single_type(proc_type->Proc.results); + if (ret_type == nullptr || !are_types_identical(ret_entity->type, ret_type)) { + return; + } + + // Walk backwards from the second-to-last stmt to find `x := call()` + // Everything between must be safe (no reassignment of x, no control flow) + i64 decl_index = -1; + for (i64 i = stmts.count - 2; i >= 0; i--) { + Ast *stmt = stmts[i]; + switch (stmt->kind) { + case Ast_ValueDecl: { + AstValueDecl *vd = &stmt->ValueDecl; + if (!vd->is_mutable) { + // constant decl — safe, keep scanning + continue; + } + if (vd->names.count == 1 && vd->values.count == 1) { + Entity *e = entity_of_node(vd->names[0]); + if (e == ret_entity) { + Ast *rhs = unparen_expr(vd->values[0]); + if (rhs->kind == Ast_CallExpr && lb_call_sret_eligible(p, rhs, e->type)) { + decl_index = i; + } + goto done_scanning; + } + } + // Some other mutable ValueDecl — safe as long as it doesn't involve ret_entity + for (Ast *name : vd->names) { + if (entity_of_node(name) == ret_entity) { + goto done_scanning; + } + } + continue; + } + case Ast_ExprStmt: + // Expression statements (reads, function calls) are safe + continue; + case Ast_AssignStmt: { + // Check if any lhs is our entity + for (Ast *lhs : stmt->AssignStmt.lhs) { + Ast *l = unparen_expr(lhs); + if (l->kind == Ast_Ident && entity_of_node(l) == ret_entity) { + goto done_scanning; + } + } + continue; + } + default: + // Control flow or anything else — bail + goto done_scanning; + } + } +done_scanning: + if (decl_index >= 0) { + p->sret_rvo_entity = ret_entity; + } +} + gb_internal void lb_build_constant_value_decl(lbProcedure *p, AstValueDecl *vd) { if (vd == nullptr || vd->is_mutable) { return; @@ -2438,6 +2562,45 @@ gb_internal void lb_build_return_stmt(lbProcedure *p, Slice const &return if (return_count == 1) { Entity *e = tuple->variables[0]; + + if (LB_ENABLE_RVO && res_count == 1 && return_by_pointer) { + Ast *ret_expr = unparen_expr(return_results[0]); + + // NOTE(bill): @RVO for `return call()` in a procedure which uses `sret` and has no defers + // This forwards the sret pointer directly to the callee + if (p->defer_stmts.count == 0) { + if (ret_expr->kind == Ast_CallExpr && lb_call_sret_eligible(p, ret_expr, e->type)) { + lbValue sret_ptr = p->return_ptr.addr; + lb_build_call_expr(p, ret_expr, &sret_ptr); + if (p->type->Proc.has_named_results && e->token.string != "") { + res = lb_emit_load(p, p->return_ptr.addr); + rw_mutex_shared_lock(&p->module->values_mutex); + lbValue found = map_must_get(&p->module->values, e); + rw_mutex_shared_unlock(&p->module->values_mutex); + lb_emit_store(p, found, lb_emit_conv(p, res, e->type)); + } + LLVMBuildRetVoid(p->builder); + return; + } + } + + // NOTE(bill): @RVO for `x := call(); ...; return x` + if (p->sret_rvo_entity != nullptr) { + if (ret_expr->kind == Ast_Ident) { + Entity *ret_e = entity_of_node(ret_expr); + if (ret_e == p->sret_rvo_entity) { + lb_emit_defer_stmts(p, lbDeferExit_Return, nullptr, pos); + LLVMValueRef instr = LLVMGetLastInstruction(p->curr_block->block); + if (!lb_is_instr_terminating(instr)) { + LLVMBuildRetVoid(p->builder); + } + return; + } + } + } + } + + if (res_count == 0) { rw_mutex_shared_lock(&p->module->values_mutex); lbValue found = map_must_get(&p->module->values, e); @@ -2865,6 +3028,23 @@ gb_internal void lb_build_assign_stmt_array(lbProcedure *p, TokenKind op, lbAddr } gb_internal void lb_build_assign_stmt(lbProcedure *p, AstAssignStmt *as) { if (as->op.kind == Token_Eq) { + if (LB_ENABLE_RVO) { + // @RVO for single assignments `x = call()` + if (as->lhs.count == 1 && as->rhs.count == 1 && !is_blank_ident(as->lhs[0])) { + Ast *rhs_expr = unparen_expr(as->rhs[0]); + if (rhs_expr->kind == Ast_CallExpr) { + lbAddr lval = lb_build_addr(p, as->lhs[0]); + if (LLVMIsAAllocaInst(lval.addr.value) && lval.kind == lbAddr_Default) { + if (lb_call_sret_eligible(p, rhs_expr, lb_addr_type(lval))) { + lbValue dst = lval.addr; + lb_build_call_expr(p, rhs_expr, &dst); + return; + } + } + } + } + } + auto lvals = array_make(permanent_allocator(), 0, as->lhs.count); for (Ast *lhs : as->lhs) { @@ -3050,6 +3230,27 @@ gb_internal void lb_build_stmt(lbProcedure *p, Ast *node) { } } } else { + if (LB_ENABLE_RVO) { + // @RVO: for `x := call()` + if (vd->names.count == 1 && values.count == 1 && !is_blank_ident(vd->names[0])) { + Ast *rhs_expr = unparen_expr(values[0]); + Entity *e = entity_of_node(vd->names[0]); + if (rhs_expr->kind == Ast_CallExpr && e != nullptr && lb_call_sret_eligible(p, rhs_expr, e->type)) { + lbValue dst = {}; + if (e == p->sret_rvo_entity) { + dst = p->return_ptr.addr; + lb_add_entity(p->module, e, dst); + lb_add_debug_local_variable(p, dst.value, e->type, e->token); + } else { + lbAddr local = lb_add_local(p, e->type, e, true); + dst = local.addr; + } + lb_build_call_expr(p, rhs_expr, &dst); + break; + } + } + } + auto lvals_preused = slice_make(temporary_allocator(), vd->names.count); auto lvals = slice_make(temporary_allocator(), vd->names.count); auto inits = array_make(temporary_allocator(), 0, lvals.count);