From 1708cb556a673b237facb86328819849dfb35007 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Tue, 7 Oct 2025 09:10:34 +0100 Subject: [PATCH] Improve docs for `core:bufio`; Rename `scanner_scan` -> `scan` (keep alias of old name) --- core/bufio/reader.odin | 4 ++++ core/bufio/scanner.odin | 28 +++++++++++++++++++++------- core/bufio/writer.odin | 2 ++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/core/bufio/reader.odin b/core/bufio/reader.odin index b78cac6e1..4aa1e6c5d 100644 --- a/core/bufio/reader.odin +++ b/core/bufio/reader.odin @@ -29,6 +29,7 @@ MIN_READ_BUFFER_SIZE :: 16 @(private) DEFAULT_MAX_CONSECUTIVE_EMPTY_READS :: 128 +// reader_init initializes using an `allocator` reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator, loc := #caller_location) { size := size size = max(size, MIN_READ_BUFFER_SIZE) @@ -37,6 +38,7 @@ reader_init :: proc(b: ^Reader, rd: io.Reader, size: int = DEFAULT_BUF_SIZE, all b.buf = make([]byte, size, allocator, loc) } +// reader_init initializes using a user provided bytes buffer `buf` reader_init_with_buf :: proc(b: ^Reader, rd: io.Reader, buf: []byte) { reader_reset(b, rd) b.buf_allocator = {} @@ -49,10 +51,12 @@ reader_destroy :: proc(b: ^Reader) { b^ = {} } +// reader_size returns the number of bytes in the backing buffer reader_size :: proc(b: ^Reader) -> int { return len(b.buf) } +// reader_reset resets the read and write positions, and the error values reader_reset :: proc(b: ^Reader, r: io.Reader) { b.rd = r b.r, b.w = 0, 0 diff --git a/core/bufio/scanner.odin b/core/bufio/scanner.odin index ee2d5d1f6..27d29c685 100644 --- a/core/bufio/scanner.odin +++ b/core/bufio/scanner.odin @@ -46,6 +46,7 @@ DEFAULT_MAX_SCAN_TOKEN_SIZE :: 1<<16 @(private) _INIT_BUF_SIZE :: 4096 +// Initializes a Scanner buffer an allocator `buf_allocator` scanner_init :: proc(s: ^Scanner, r: io.Reader, buf_allocator := context.allocator) -> ^Scanner { s.r = r s.split = scan_lines @@ -53,6 +54,8 @@ scanner_init :: proc(s: ^Scanner, r: io.Reader, buf_allocator := context.allocat s.buf.allocator = buf_allocator return s } + +// Initializes a Scanner buffer a user provided bytes buffer `buf` scanner_init_with_buffer :: proc(s: ^Scanner, r: io.Reader, buf: []byte) -> ^Scanner { s.r = r s.split = scan_lines @@ -75,24 +78,27 @@ scanner_error :: proc(s: ^Scanner) -> Scanner_Error { return s._err } -// Returns the most recent token created by scanner_scan. +// Returns the most recent token created by 'scan'. // The underlying array may point to data that may be overwritten -// by another call to scanner_scan. +// by another call to 'scan'. // Treat the returned value as if it is immutable. scanner_bytes :: proc(s: ^Scanner) -> []byte { return s.token } -// Returns the most recent token created by scanner_scan. +// Returns the most recent token created by 'scan'. // The underlying array may point to data that may be overwritten -// by another call to scanner_scan. +// by another call to 'scan'. // Treat the returned value as if it is immutable. scanner_text :: proc(s: ^Scanner) -> string { return string(s.token) } -// scanner_scan advances the scanner -scanner_scan :: proc(s: ^Scanner) -> bool { +// scanner_scan is an alias of scan +scanner_scan :: scan + +// scan advances the Scanner +scan :: proc(s: ^Scanner) -> bool { set_err :: proc(s: ^Scanner, err: Scanner_Error) { switch s._err { case nil, .EOF: @@ -229,6 +235,7 @@ scanner_scan :: proc(s: ^Scanner) -> bool { } } +// scan_bytes is a splitting procedure that returns each byte as a token scan_bytes :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) { if at_eof && len(data) == 0 { return @@ -236,6 +243,10 @@ scan_bytes :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, return 1, data[0:1], nil, false } +// scan_runes is a splitting procedure that returns each UTF-8 encoded rune as a token. +// The lsit of runes return is equivalent to that of iterating over a string in a 'for in' loop, meaning any +// erroneous UTF-8 encodings will be returned as U+FFFD. Unfortunately this means it is impossible for the "client" +// to know whether a U+FFFD is an expected replacement rune or an encoding of an error. scan_runes :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) { if at_eof && len(data) == 0 { return @@ -264,7 +275,8 @@ scan_runes :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, token = ERROR_RUNE return } - +// scan_words is a splitting procedure that returns each Unicode-space-separated word of text, excluding the surrounded spaces. +// It will never return return an empty string. scan_words :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) { is_space :: proc "contextless" (r: rune) -> bool { switch r { @@ -312,6 +324,8 @@ scan_words :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, return } +// scan_lines is a splitting procedure that returns each line of text stripping of any trailing newline and an optional preceding carriage return (\r?\n). +// A new line is allowed to be empty. scan_lines :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) { trim_carriage_return :: proc "contextless" (data: []byte) -> []byte { if len(data) > 0 && data[len(data)-1] == '\r' { diff --git a/core/bufio/writer.odin b/core/bufio/writer.odin index 5edd3dd6b..9c73baf87 100644 --- a/core/bufio/writer.odin +++ b/core/bufio/writer.odin @@ -19,6 +19,7 @@ Writer :: struct { } +// Initialized a Writer with an `allocator` writer_init :: proc(b: ^Writer, wr: io.Writer, size: int = DEFAULT_BUF_SIZE, allocator := context.allocator) { size := size size = max(size, MIN_READ_BUFFER_SIZE) @@ -27,6 +28,7 @@ writer_init :: proc(b: ^Writer, wr: io.Writer, size: int = DEFAULT_BUF_SIZE, all b.buf = make([]byte, size, allocator) } +// Initialized a Writer with a user provided buffer `buf` writer_init_with_buf :: proc(b: ^Writer, wr: io.Writer, buf: []byte) { writer_reset(b, wr) b.buf_allocator = {}