diff --git a/core/prof/spall/doc.odin b/core/prof/spall/doc.odin index 0f3cc8bb8..c34ba0d5b 100644 --- a/core/prof/spall/doc.odin +++ b/core/prof/spall/doc.odin @@ -1,26 +1,38 @@ /* -import "core:prof/spall" + import "core:prof/spall" -spall_ctx: spall.Context -spall_buffer: spall.Buffer + spall_ctx: spall.Context + spall_buffer: spall.Buffer -foo :: proc() { - spall.SCOPED_EVENT(&spall_ctx, &spall_buffer, #procedure) -} + foo :: proc() { + spall.SCOPED_EVENT(&spall_ctx, &spall_buffer, #procedure) + } -main :: proc() { - spall_ctx = spall.context_create("trace_test.spall") - defer spall.context_destroy(&spall_ctx) + main :: proc() { + spall_ctx = spall.context_create("trace_test.spall") + defer spall.context_destroy(&spall_ctx) - buffer_backing := make([]u8, spall.BUFFER_DEFAULT_SIZE) - spall_buffer = spall.buffer_create(buffer_backing) - defer spall.buffer_destroy(&spall_ctx, &spall_buffer) + buffer_backing := make([]u8, spall.BUFFER_DEFAULT_SIZE) + spall_buffer = spall.buffer_create(buffer_backing) + defer spall.buffer_destroy(&spall_ctx, &spall_buffer) - spall.SCOPED_EVENT(&spall_ctx, &spall_buffer, #procedure) + spall.SCOPED_EVENT(&spall_ctx, &spall_buffer, #procedure) - for i := 0; i < 9001; i += 1 { - foo() - } -} + for i := 0; i < 9001; i += 1 { + foo() + } + } + + // Automatic profiling of every procedure: + + @(instrumentation_enter) + spall_enter :: proc "contextless" (proc_address, call_site_return_address: rawptr, loc: runtime.Source_Code_Location) { + spall._buffer_begin(&spall_ctx, &spall_buffer, "", "", loc) + } + + @(instrumentation_exit) + spall_exit :: proc "contextless" (proc_address, call_site_return_address: rawptr, loc: runtime.Source_Code_Location) { + spall._buffer_end(&spall_ctx, &spall_buffer) + } */ package spall diff --git a/core/prof/spall/spall.odin b/core/prof/spall/spall.odin index 7e4334c2c..a6fc59e74 100644 --- a/core/prof/spall/spall.odin +++ b/core/prof/spall/spall.odin @@ -3,7 +3,6 @@ package spall import "core:os" import "core:time" import "base:intrinsics" -import "core:mem" // File Format @@ -111,9 +110,10 @@ buffer_create :: proc(data: []byte, tid: u32 = 0, pid: u32 = 0) -> (buffer: Buff return } -buffer_flush :: proc(ctx: ^Context, buffer: ^Buffer) { +@(no_instrumentation) +buffer_flush :: proc "contextless" (ctx: ^Context, buffer: ^Buffer) #no_bounds_check /* bounds check would segfault instrumentation */ { start := _trace_now(ctx) - os.write(ctx.fd, buffer.data[:buffer.head]) + write(ctx.fd, buffer.data[:buffer.head]) buffer.head = 0 end := _trace_now(ctx) @@ -140,15 +140,16 @@ _scoped_buffer_end :: proc(ctx: ^Context, buffer: ^Buffer, _, _: string, _ := #c _buffer_end(ctx, buffer) } - +@(no_instrumentation) _trace_now :: proc "contextless" (ctx: ^Context) -> f64 { if !ctx.precise_time { - return f64(time.tick_now()._nsec) / 1_000 + return f64(tick_now()) / 1_000 } return f64(intrinsics.read_cycle_counter()) } +@(no_instrumentation) _build_header :: proc "contextless" (buffer: []u8, timestamp_scale: f64) -> (header_size: int, ok: bool) #optional_ok { header_size = size_of(Manual_Header) if header_size > len(buffer) { @@ -164,7 +165,8 @@ _build_header :: proc "contextless" (buffer: []u8, timestamp_scale: f64) -> (hea return } -_build_begin :: proc "contextless" (buffer: []u8, name: string, args: string, ts: f64, tid: u32, pid: u32) -> (event_size: int, ok: bool) #optional_ok { +@(no_instrumentation) +_build_begin :: #force_inline proc "contextless" (buffer: []u8, name: string, args: string, ts: f64, tid: u32, pid: u32) -> (event_size: int, ok: bool) #optional_ok #no_bounds_check /* bounds check would segfault instrumentation */ { ev := (^Begin_Event)(raw_data(buffer)) name_len := min(len(name), 255) args_len := min(len(args), 255) @@ -180,13 +182,14 @@ _build_begin :: proc "contextless" (buffer: []u8, name: string, args: string, ts ev.ts = f64le(ts) ev.name_len = u8(name_len) ev.args_len = u8(args_len) - mem.copy(raw_data(buffer[size_of(Begin_Event):]), raw_data(name), name_len) - mem.copy(raw_data(buffer[size_of(Begin_Event)+name_len:]), raw_data(args), args_len) + intrinsics.mem_copy_non_overlapping(raw_data(buffer[size_of(Begin_Event):]), raw_data(name), name_len) + intrinsics.mem_copy_non_overlapping(raw_data(buffer[size_of(Begin_Event)+name_len:]), raw_data(args), args_len) ok = true return } +@(no_instrumentation) _build_end :: proc "contextless" (buffer: []u8, ts: f64, tid: u32, pid: u32) -> (event_size: int, ok: bool) #optional_ok { ev := (^End_Event)(raw_data(buffer)) event_size = size_of(End_Event) @@ -203,7 +206,8 @@ _build_end :: proc "contextless" (buffer: []u8, ts: f64, tid: u32, pid: u32) -> return } -_buffer_begin :: proc(ctx: ^Context, buffer: ^Buffer, name: string, args: string = "", location := #caller_location) { +@(no_instrumentation) +_buffer_begin :: proc "contextless" (ctx: ^Context, buffer: ^Buffer, name: string, args: string = "", location := #caller_location) #no_bounds_check /* bounds check would segfault instrumentation */ { if buffer.head + BEGIN_EVENT_MAX > len(buffer.data) { buffer_flush(ctx, buffer) } @@ -211,7 +215,8 @@ _buffer_begin :: proc(ctx: ^Context, buffer: ^Buffer, name: string, args: string buffer.head += _build_begin(buffer.data[buffer.head:], name, args, _trace_now(ctx), buffer.tid, buffer.pid) } -_buffer_end :: proc(ctx: ^Context, buffer: ^Buffer) { +@(no_instrumentation) +_buffer_end :: proc "contextless" (ctx: ^Context, buffer: ^Buffer) #no_bounds_check /* bounds check would segfault instrumentation */ { ts := _trace_now(ctx) if buffer.head + size_of(End_Event) > len(buffer.data) { @@ -220,3 +225,13 @@ _buffer_end :: proc(ctx: ^Context, buffer: ^Buffer) { buffer.head += _build_end(buffer.data[buffer.head:], ts, buffer.tid, buffer.pid) } + +@(no_instrumentation) +write :: proc "contextless" (fd: os.Handle, buf: []byte) -> (n: int, err: os.Errno) { + return _write(fd, buf) +} + +@(no_instrumentation) +tick_now :: proc "contextless" () -> (ns: i64) { + return _tick_now() +} diff --git a/core/prof/spall/spall_linux.odin b/core/prof/spall/spall_linux.odin new file mode 100644 index 000000000..3f475c5e0 --- /dev/null +++ b/core/prof/spall/spall_linux.odin @@ -0,0 +1,36 @@ +//+private +package spall + +// Only for types and constants. +import "core:os" + +// Package is `//+no-instrumentation`, safe to use. +import "core:sys/linux" + +MAX_RW :: 0x7fffffff + +@(no_instrumentation) +_write :: proc "contextless" (fd: os.Handle, data: []byte) -> (n: int, err: os.Errno) #no_bounds_check /* bounds check would segfault instrumentation */ { + if len(data) == 0 { + return 0, os.ERROR_NONE + } + + for n < len(data) { + chunk := data[:min(len(data), MAX_RW)] + written, errno := linux.write(linux.Fd(fd), chunk) + if errno != .NONE { + return n, os.Errno(errno) + } + n += written + } + + return n, os.ERROR_NONE +} + +CLOCK_MONOTONIC_RAW :: 4 // NOTE(tetra): "RAW" means: Not adjusted by NTP. + +@(no_instrumentation) +_tick_now :: proc "contextless" () -> (ns: i64) { + t, _ := linux.clock_gettime(.MONOTONIC_RAW) + return i64(t.time_sec)*1e9 + i64(t.time_nsec) +} diff --git a/core/prof/spall/spall_unix.odin b/core/prof/spall/spall_unix.odin new file mode 100644 index 000000000..7915f8c32 --- /dev/null +++ b/core/prof/spall/spall_unix.odin @@ -0,0 +1,57 @@ +//+private +//+build darwin, freebsd, openbsd +package spall + +// Only for types. +import "core:os" + +when ODIN_OS == .Darwin { + foreign import libc "system:System.framework" +} else { + foreign import libc "system:c" +} + +timespec :: struct { + tv_sec: i64, // seconds + tv_nsec: i64, // nanoseconds +} + +foreign libc { + __error :: proc() -> ^i32 --- + @(link_name="write") _unix_write :: proc(handle: os.Handle, buffer: rawptr, count: uint) -> int --- + @(link_name="clock_gettime") _unix_clock_gettime :: proc(clock_id: u64, timespec: ^timespec) -> i32 --- +} + +@(no_instrumentation) +get_last_error :: proc "contextless" () -> int { + return int(__error()^) +} + +MAX_RW :: 0x7fffffff + +@(no_instrumentation) +_write :: proc "contextless" (fd: os.Handle, data: []byte) -> (n: int, err: os.Errno) #no_bounds_check /* bounds check would segfault instrumentation */ { + if len(data) == 0 { + return 0, os.ERROR_NONE + } + + for n < len(data) { + chunk := data[:min(len(data), MAX_RW)] + written := _unix_write(fd, raw_data(chunk), len(chunk)) + if written < 0 { + return n, os.Errno(get_last_error()) + } + n += written + } + + return n, os.ERROR_NONE +} + +CLOCK_MONOTONIC_RAW :: 4 // NOTE(tetra): "RAW" means: Not adjusted by NTP. + +@(no_instrumentation) +_tick_now :: proc "contextless" () -> (ns: i64) { + t: timespec + _unix_clock_gettime(CLOCK_MONOTONIC_RAW, &t) + return t.tv_sec*1e9 + t.tv_nsec +} diff --git a/core/prof/spall/spall_windows.odin b/core/prof/spall/spall_windows.odin new file mode 100644 index 000000000..4d96c111a --- /dev/null +++ b/core/prof/spall/spall_windows.odin @@ -0,0 +1,54 @@ +//+private +package spall + +// Only for types. +import "core:os" + +// Package is `//+no-instrumentation`, safe to use. +import win32 "core:sys/windows" + +MAX_RW :: 1<<30 + +@(no_instrumentation) +_write :: proc "contextless" (fd: os.Handle, data: []byte) -> (int, os.Errno) #no_bounds_check /* bounds check would segfault instrumentation */ { + if len(data) == 0 { + return 0, os.ERROR_NONE + } + + single_write_length: win32.DWORD + total_write: i64 + length := i64(len(data)) + + for total_write < length { + remaining := length - total_write + to_write := win32.DWORD(min(i32(remaining), MAX_RW)) + + e := win32.WriteFile(win32.HANDLE(fd), &data[total_write], to_write, &single_write_length, nil) + if single_write_length <= 0 || !e { + err := os.Errno(win32.GetLastError()) + return int(total_write), err + } + total_write += i64(single_write_length) + } + return int(total_write), os.ERROR_NONE +} + +@(no_instrumentation) +_tick_now :: proc "contextless" () -> (ns: i64) { + @(no_instrumentation) + mul_div_u64 :: #force_inline proc "contextless" (val, num, den: i64) -> i64 { + q := val / den + r := val % den + return q * num + r * num / den + } + + @thread_local qpc_frequency: win32.LARGE_INTEGER + + if qpc_frequency == 0 { + win32.QueryPerformanceFrequency(&qpc_frequency) + } + now: win32.LARGE_INTEGER + win32.QueryPerformanceCounter(&now) + + return mul_div_u64(i64(now), 1e9, i64(qpc_frequency)) +} diff --git a/core/sys/linux/helpers.odin b/core/sys/linux/helpers.odin index 9bee57c11..69c648bf1 100644 --- a/core/sys/linux/helpers.odin +++ b/core/sys/linux/helpers.odin @@ -1,4 +1,5 @@ //+build linux +//+no-instrumentation package linux import "base:intrinsics" diff --git a/core/sys/linux/sys.odin b/core/sys/linux/sys.odin index 8a93f0a47..869ce88e3 100644 --- a/core/sys/linux/sys.odin +++ b/core/sys/linux/sys.odin @@ -1,3 +1,4 @@ +//+no-instrumentation package linux import "base:intrinsics" @@ -2383,7 +2384,11 @@ timer_delete :: proc "contextless" (timer: Timer) -> (Errno) { // TODO(flysand): clock_settime -// TODO(flysand): clock_gettime +clock_gettime :: proc "contextless" (clock: Clock_Id) -> (ts: Time_Spec, err: Errno) { + ret := syscall(SYS_clock_gettime, clock, &ts) + err = Errno(-ret) + return +} // TODO(flysand): clock_getres diff --git a/vendor/OpenGL/helpers.odin b/vendor/OpenGL/helpers.odin index a7432db95..0bd8e451b 100644 --- a/vendor/OpenGL/helpers.odin +++ b/vendor/OpenGL/helpers.odin @@ -120,7 +120,6 @@ when GL_DEBUG { } // Compiling shaders are identical for any shader (vertex, geometry, fragment, tesselation, (maybe compute too)) -@private compile_shader_from_source :: proc(shader_data: string, shader_type: Shader_Type) -> (shader_id: u32, ok: bool) { shader_id = CreateShader(cast(u32)shader_type) length := i32(len(shader_data)) @@ -134,7 +133,6 @@ compile_shader_from_source :: proc(shader_data: string, shader_type: Shader_Type } // only used once, but I'd just make a subprocedure(?) for consistency -@private create_and_link_program :: proc(shader_ids: []u32, binary_retrievable := false) -> (program_id: u32, ok: bool) { program_id = CreateProgram() for id in shader_ids {