Begin to try to minimize thread contention in the LLVM section

This commit is contained in:
gingerBill
2025-09-12 16:27:33 +01:00
parent cf4262d22c
commit dc5044052f
6 changed files with 197 additions and 29 deletions

View File

@@ -554,6 +554,7 @@ struct BuildContext {
bool internal_no_inline;
bool internal_by_value;
bool internal_ignore_verification;
bool no_threaded_checker;

View File

@@ -8,7 +8,7 @@
#endif
#ifndef LLVM_IGNORE_VERIFICATION
#define LLVM_IGNORE_VERIFICATION 0
#define LLVM_IGNORE_VERIFICATION build_context.internal_ignore_verification
#endif
@@ -1922,22 +1922,24 @@ gb_internal WORKER_TASK_PROC(lb_llvm_module_verification_worker_proc) {
}
struct lbCreateStartupWorkerData {
lbModule *main_module;
lbProcedure *p;
lbProcedure *objc_names;
Array<lbGlobalVariable> *global_variables;
};
gb_internal lbProcedure *lb_create_startup_runtime(lbModule *main_module, lbProcedure *objc_names, Array<lbGlobalVariable> &global_variables) { // Startup Runtime
Type *proc_type = alloc_type_proc(nullptr, nullptr, 0, nullptr, 0, false, ProcCC_Odin);
gb_internal WORKER_TASK_PROC(lb_create_startup_runtime_worker_proc) {
lbCreateStartupWorkerData *wd = cast(lbCreateStartupWorkerData *)data;
lbProcedure *p = lb_create_dummy_procedure(main_module, str_lit(LB_STARTUP_RUNTIME_PROC_NAME), proc_type);
p->is_startup = true;
lb_add_attribute_to_proc(p->module, p->value, "optnone");
lb_add_attribute_to_proc(p->module, p->value, "noinline");
// Make sure shared libraries call their own runtime startup on Linux.
LLVMSetVisibility(p->value, LLVMHiddenVisibility);
LLVMSetLinkage(p->value, LLVMWeakAnyLinkage);
lbModule *main_module = wd->main_module;
lbProcedure *p = wd->p;
lbProcedure *objc_names = wd->objc_names;
Array<lbGlobalVariable> &global_variables = *wd->global_variables;
lb_begin_procedure_body(p);
lb_setup_type_info_data(main_module);
lb_setup_type_info_data(p->module);
if (objc_names) {
LLVMBuildCall2(p->builder, lb_type_internal_for_procedures_raw(main_module, objc_names->type), objc_names->value, nullptr, 0, "");
@@ -2023,7 +2025,7 @@ gb_internal lbProcedure *lb_create_startup_runtime(lbModule *main_module, lbProc
}
CheckerInfo *info = main_module->gen->info;
for (Entity *e : info->init_procedures) {
lbValue value = lb_find_procedure_value_from_entity(main_module, e);
lb_emit_call(p, value, {}, ProcInlining_none);
@@ -2033,6 +2035,34 @@ gb_internal lbProcedure *lb_create_startup_runtime(lbModule *main_module, lbProc
lb_end_procedure_body(p);
lb_verify_function(main_module, p);
return 0;
}
gb_internal lbProcedure *lb_create_startup_runtime(lbModule *main_module, lbProcedure *objc_names, Array<lbGlobalVariable> *global_variables) { // Startup Runtime
Type *proc_type = alloc_type_proc(nullptr, nullptr, 0, nullptr, 0, false, ProcCC_Odin);
lbProcedure *p = lb_create_dummy_procedure(main_module, str_lit(LB_STARTUP_RUNTIME_PROC_NAME), proc_type);
p->is_startup = true;
lb_add_attribute_to_proc(p->module, p->value, "optnone");
lb_add_attribute_to_proc(p->module, p->value, "noinline");
// Make sure shared libraries call their own runtime startup on Linux.
LLVMSetVisibility(p->value, LLVMHiddenVisibility);
LLVMSetLinkage(p->value, LLVMWeakAnyLinkage);
lbCreateStartupWorkerData *wd = permanent_alloc_item<lbCreateStartupWorkerData>();
wd->main_module = main_module;
wd->p = p;
wd->objc_names = objc_names;
wd->global_variables = global_variables;
// TODO(bill): determine whether or not this is better multithreaded or not
lb_create_startup_runtime_worker_proc(wd);
return p;
}
@@ -2389,14 +2419,18 @@ gb_internal WORKER_TASK_PROC(lb_llvm_module_pass_worker_proc) {
return 0;
}
gb_internal WORKER_TASK_PROC(lb_generate_procedures_worker_proc) {
gb_internal WORKER_TASK_PROC(lb_generate_procedures_in_a_module_worker_proc) {
lbModule *m = cast(lbModule *)data;
lbModuleTiming module_timing = lb_module_timing_start(m);
for (isize i = 0; i < m->procedures_to_generate.count; i++) {
lbProcedure *p = m->procedures_to_generate[i];
lb_generate_procedure(p->module, p);
}
lb_module_timing_end(&module_timing);
return 0;
}
@@ -2404,25 +2438,32 @@ gb_internal void lb_generate_procedures(lbGenerator *gen, bool do_threading) {
if (do_threading) {
for (auto const &entry : gen->modules) {
lbModule *m = entry.value;
thread_pool_add_task(lb_generate_procedures_worker_proc, m);
thread_pool_add_task(lb_generate_procedures_in_a_module_worker_proc, m);
}
thread_pool_wait();
} else {
for (auto const &entry : gen->modules) {
lbModule *m = entry.value;
lb_generate_procedures_worker_proc(m);
lb_generate_procedures_in_a_module_worker_proc(m);
}
}
}
gb_internal WORKER_TASK_PROC(lb_generate_missing_procedures_to_check_worker_proc) {
lbModule *m = cast(lbModule *)data;
lbModuleTiming module_timing = lb_module_timing_start(m);
module_timing.is_from_missing = true;
for (isize i = 0; i < m->missing_procedures_to_check.count; i++) {
lbProcedure *p = m->missing_procedures_to_check[i];
debugf("Generate missing procedure: %.*s module %p\n", LIT(p->name), m);
lb_generate_procedure(m, p);
}
lb_module_timing_end(&module_timing);
return 0;
}
@@ -3333,7 +3374,7 @@ gb_internal bool lb_generate_code(lbGenerator *gen) {
gen->objc_names = lb_create_objc_names(default_module);
TIME_SECTION("LLVM Runtime Startup Creation (Global Variables & @(init))");
gen->startup_runtime = lb_create_startup_runtime(default_module, gen->objc_names, global_variables);
gen->startup_runtime = lb_create_startup_runtime(default_module, gen->objc_names, &global_variables);
TIME_SECTION("LLVM Runtime Cleanup Creation & @(fini)");
gen->cleanup_runtime = lb_create_cleanup_runtime(default_module);
@@ -3532,6 +3573,78 @@ gb_internal bool lb_generate_code(lbGenerator *gen) {
label_object_generation = gb_string_append_fmt(label_object_generation, " (%td used modules)", gen->used_module_count);
}
TIME_SECTION_WITH_LEN(label_object_generation, gb_string_length(label_object_generation));
// if (true) {
// Array<lbProcedureTiming> timings = {};
// array_init(&timings, heap_allocator(), 0, gen->procedure_timings.count);
// defer (array_free(&timings));
// for (lbProcedureTiming pt = {}; mpsc_dequeue(&gen->procedure_timings, &pt); /**/) {
// array_add(&timings, pt);
// }
// array_sort(timings, [](void const *a, void const *b) -> int {
// lbProcedureTiming const *x = cast(lbProcedureTiming *)a;
// lbProcedureTiming const *y = cast(lbProcedureTiming *)b;
// i64 t_x = cast(i64)(x->end - x->start);
// i64 t_y = cast(i64)(y->end - y->start);
// if (t_x < t_y) {
// return +1;
// } else if (t_x > t_y) {
// return -1;
// }
// return 0;
// });
// isize count = 0;
// for (lbProcedureTiming const &pt : timings) {
// if (count++ >= 20) {
// break;
// }
// f64 t_us = cast(f64)(pt.end - pt.start) / 1.0e3;
// f64 v_us = cast(f64)(pt.after_verify - pt.end) / 1.0e3;
// gb_printf_err("%.*s %.3f us %.3f us\n", LIT(pt.p->name), t_us, v_us);
// }
// return false;
// }
if (true) {
Array<lbModuleTiming> timings = {};
array_init(&timings, heap_allocator(), 0, gen->module_timings.count);
defer (array_free(&timings));
for (lbModuleTiming mt = {}; mpsc_dequeue(&gen->module_timings, &mt); /**/) {
array_add(&timings, mt);
}
array_sort(timings, [](void const *a, void const *b) -> int {
lbModuleTiming const *x = cast(lbModuleTiming *)a;
lbModuleTiming const *y = cast(lbModuleTiming *)b;
i64 t_x = cast(i64)(x->end - x->start);
i64 t_y = cast(i64)(y->end - y->start);
if (t_x < t_y) {
return +1;
} else if (t_x > t_y) {
return -1;
}
return 0;
});
for (lbModuleTiming const &mt : timings) {
f64 t = cast(f64)(mt.end - mt.start) / 1.0e3;
gb_printf_err("%s %.3f us - procedures %u\n", mt.m->module_name, t, mt.m->procedures.count);
}
return false;
}
if (build_context.ignore_llvm_build) {
gb_printf_err("LLVM object generation has been ignored!\n");

View File

@@ -150,6 +150,7 @@ struct lbModule {
struct lbGenerator *gen;
LLVMTargetMachineRef target_machine;
Checker *checker;
CheckerInfo *info;
AstPackage *pkg; // possibly associated
AstFile *file; // possibly associated
@@ -225,6 +226,19 @@ struct lbObjCGlobal {
Type * class_impl_type; // This is set when the class has the objc_implement attribute set to true.
};
struct lbModuleTiming {
lbModule *m;
u64 start;
u64 end;
bool is_from_missing;
};
gb_internal lbModuleTiming lb_module_timing_start(lbModule *m);
gb_internal void lb_module_timing_end(lbModuleTiming *mt);
struct lbGenerator : LinkerData {
CheckerInfo *info;
@@ -233,7 +247,7 @@ struct lbGenerator : LinkerData {
lbModule default_module;
RecursiveMutex anonymous_proc_lits_mutex;
PtrMap<Ast *, lbProcedure *> anonymous_proc_lits;
PtrMap<Ast *, lbProcedure *> anonymous_proc_lits; // TODO(bill): Why does this have to be "global"?
isize used_module_count;
@@ -246,6 +260,8 @@ struct lbGenerator : LinkerData {
MPSCQueue<lbObjCGlobal> objc_classes;
MPSCQueue<lbObjCGlobal> objc_ivars;
MPSCQueue<String> raddebug_section_strings;
MPSCQueue<lbModuleTiming> module_timings;
};

View File

@@ -115,6 +115,13 @@ gb_internal void lb_init_module(lbModule *m, Checker *c) {
}
gb_internal WORKER_TASK_PROC(lb_init_module_worker_proc) {
lbModule *m = cast(lbModule *)data;
lb_init_module(m, m->checker);
return 0;
}
gb_internal bool lb_init_generator(lbGenerator *gen, Checker *c) {
if (global_error_collector.count != 0) {
return false;
@@ -134,19 +141,26 @@ gb_internal bool lb_init_generator(lbGenerator *gen, Checker *c) {
gen->info = &c->info;
map_init(&gen->modules, gen->info->packages.count*2);
map_init(&gen->modules_through_ctx, gen->info->packages.count*2);
bool module_per_file = build_context.module_per_file && build_context.optimization_level <= 0;
isize module_capacity = 0;
module_capacity += gen->info->packages.count*2;
module_capacity += gen->info->files.count*2;
map_init(&gen->modules, module_capacity);
map_init(&gen->modules_through_ctx, module_capacity);
map_init(&gen->anonymous_proc_lits, 1024);
if (USE_SEPARATE_MODULES) {
bool module_per_file = build_context.module_per_file && build_context.optimization_level <= 0;
for (auto const &entry : gen->info->packages) {
AstPackage *pkg = entry.value;
auto m = gb_alloc_item(permanent_allocator(), lbModule);
m->checker = c;
m->pkg = pkg;
m->gen = gen;
map_set(&gen->modules, cast(void *)pkg, m);
lb_init_module(m, c);
if (!module_per_file) {
continue;
}
@@ -162,6 +176,8 @@ gb_internal bool lb_init_generator(lbGenerator *gen, Checker *c) {
}
}
thread_pool_wait();
gen->default_module.gen = gen;
map_set(&gen->modules, cast(void *)1, &gen->default_module);
lb_init_module(&gen->default_module, c);
@@ -178,11 +194,26 @@ gb_internal bool lb_init_generator(lbGenerator *gen, Checker *c) {
mpsc_init(&gen->objc_ivars, heap_allocator());
mpsc_init(&gen->raddebug_section_strings, heap_allocator());
mpsc_init(&gen->module_timings, heap_allocator());
return true;
}
gb_internal lbModuleTiming lb_module_timing_start(lbModule *m) {
lbModuleTiming mt = {};
mt.m = m;
mt.start = time_stamp_time_now();
return mt;
}
gb_internal void lb_module_timing_end(lbModuleTiming *mt) {
mt->end = time_stamp_time_now();
mpsc_enqueue(&mt->m->gen->module_timings, *mt);
}
gb_internal lbValue lb_global_type_info_data_ptr(lbModule *m) {
lbValue v = lb_find_value_from_entity(m, lb_global_type_info_data_entity);
return v;

View File

@@ -1256,21 +1256,21 @@ gb_internal lbValue lb_emit_call(lbProcedure *p, lbValue value, Array<lbValue> c
by_ptr = true;
/*fallthrough*/
case DeferredProcedure_in:
result_as_args = array_clone(heap_allocator(), in_args);
result_as_args = array_clone(permanent_allocator(), in_args);
break;
case DeferredProcedure_out_by_ptr:
by_ptr = true;
/*fallthrough*/
case DeferredProcedure_out:
result_as_args = lb_value_to_array(p, heap_allocator(), result);
result_as_args = lb_value_to_array(p, permanent_allocator(), result);
break;
case DeferredProcedure_in_out_by_ptr:
by_ptr = true;
/*fallthrough*/
case DeferredProcedure_in_out:
{
auto out_args = lb_value_to_array(p, heap_allocator(), result);
array_init(&result_as_args, heap_allocator(), in_args.count + out_args.count);
auto out_args = lb_value_to_array(p, permanent_allocator(), result);
array_init(&result_as_args, permanent_allocator(), in_args.count + out_args.count);
array_copy(&result_as_args, in_args, 0);
array_copy(&result_as_args, out_args, in_args.count);
}
@@ -4228,10 +4228,11 @@ gb_internal lbValue lb_build_call_expr_internal(lbProcedure *p, Ast *expr) {
variadic_args = lb_build_expr(p, variadic[0]);
variadic_args = lb_emit_conv(p, variadic_args, slice_type);
} else {
TEMPORARY_ALLOCATOR_GUARD();
Type *elem_type = slice_type->Slice.elem;
auto var_args = array_make<lbValue>(heap_allocator(), 0, variadic.count);
defer (array_free(&var_args));
auto var_args = array_make<lbValue>(temporary_allocator(), 0, variadic.count);
for (Ast *var_arg : variadic) {
lbValue v = lb_build_expr(p, var_arg);
lb_add_values_to_array(p, &var_args, v);

View File

@@ -403,6 +403,7 @@ enum BuildFlagKind {
BuildFlag_InternalCached,
BuildFlag_InternalNoInline,
BuildFlag_InternalByValue,
BuildFlag_InternalIgnoreVerification,
BuildFlag_Tilde,
@@ -626,6 +627,7 @@ gb_internal bool parse_build_flags(Array<String> args) {
add_flag(&build_flags, BuildFlag_InternalCached, str_lit("internal-cached"), BuildFlagParam_None, Command_all);
add_flag(&build_flags, BuildFlag_InternalNoInline, str_lit("internal-no-inline"), BuildFlagParam_None, Command_all);
add_flag(&build_flags, BuildFlag_InternalByValue, str_lit("internal-by-value"), BuildFlagParam_None, Command_all);
add_flag(&build_flags, BuildFlag_InternalIgnoreVerification, str_lit("internal-ignore-verification"), BuildFlagParam_None, Command_all);
#if ALLOW_TILDE
add_flag(&build_flags, BuildFlag_Tilde, str_lit("tilde"), BuildFlagParam_None, Command__does_build);
@@ -1584,6 +1586,10 @@ gb_internal bool parse_build_flags(Array<String> args) {
case BuildFlag_InternalByValue:
build_context.internal_by_value = true;
break;
case BuildFlag_InternalIgnoreVerification:
build_context.internal_ignore_verification = true;
break;
case BuildFlag_Tilde:
build_context.tilde_backend = true;