diff --git a/core/strings/ascii_set.odin b/core/strings/ascii_set.odin index 582049eee..9b59666f3 100644 --- a/core/strings/ascii_set.odin +++ b/core/strings/ascii_set.odin @@ -5,6 +5,7 @@ import "core:unicode/utf8" Ascii_Set :: distinct [8]u32 +// create an ascii set of all unique characters in the string ascii_set_make :: proc(chars: string) -> (as: Ascii_Set, ok: bool) #no_bounds_check { for i in 0.. (as: Ascii_Set, ok: bool) #no_bounds_ch return } +// returns true when the `c` byte is contained in the `as` ascii set ascii_set_contains :: proc(as: Ascii_Set, c: byte) -> bool #no_bounds_check { return as[c>>5] & (1<<(c&31)) != 0 } \ No newline at end of file diff --git a/core/strings/builder.odin b/core/strings/builder.odin index 6952ba088..d6065cf70 100644 --- a/core/strings/builder.odin +++ b/core/strings/builder.odin @@ -7,40 +7,56 @@ import "core:io" Builder_Flush_Proc :: #type proc(b: ^Builder) -> (do_reset: bool) +/* + dynamic byte buffer / string builder with helper procedures + the dynamic array is wrapped inside the struct to be more opaque + you can use `fmt.sbprint*` procedures with a `^strings.Builder` directly +*/ Builder :: struct { buf: [dynamic]byte, } +// return a builder, default length 0 / cap 16 are done through make make_builder_none :: proc(allocator := context.allocator) -> Builder { return Builder{buf=make([dynamic]byte, allocator)} } +// return a builder, with a set length `len` and cap 16 byte buffer make_builder_len :: proc(len: int, allocator := context.allocator) -> Builder { return Builder{buf=make([dynamic]byte, len, allocator)} } +// return a builder, with a set length `len` byte buffer and a custom `cap` make_builder_len_cap :: proc(len, cap: int, allocator := context.allocator) -> Builder { return Builder{buf=make([dynamic]byte, len, cap, allocator)} } +// overload simple `make_builder_*` with or without len / cap parameters make_builder :: proc{ make_builder_none, make_builder_len, make_builder_len_cap, } +// initialize a builder, default length 0 / cap 16 are done through make +// replaces the existing `buf` init_builder_none :: proc(b: ^Builder, allocator := context.allocator) { b.buf = make([dynamic]byte, allocator) } +// initialize a builder, with a set length `len` and cap 16 byte buffer +// replaces the existing `buf` init_builder_len :: proc(b: ^Builder, len: int, allocator := context.allocator) { b.buf = make([dynamic]byte, len, allocator) } +// initialize a builder, with a set length `len` byte buffer and a custom `cap` +// replaces the existing `buf` init_builder_len_cap :: proc(b: ^Builder, len, cap: int, allocator := context.allocator) { b.buf = make([dynamic]byte, len, cap, allocator) } +// overload simple `init_builder_*` with or without len / ap parameters init_builder :: proc{ init_builder_none, init_builder_len, @@ -76,30 +92,42 @@ _builder_stream_vtable := &io.Stream_VTable{ }, } +// return an `io.Stream` from a builder to_stream :: proc(b: ^Builder) -> io.Stream { return io.Stream{stream_vtable=_builder_stream_vtable, stream_data=b} } + +// return an `io.Writer` from a builder to_writer :: proc(b: ^Builder) -> io.Writer { return io.to_writer(to_stream(b)) } - - - +// delete and clear the builder byte buffer content destroy_builder :: proc(b: ^Builder) { delete(b.buf) clear(&b.buf) } +// reserve the builfer byte buffer to a specific cap, when it's higher than before grow_builder :: proc(b: ^Builder, cap: int) { reserve(&b.buf, cap) } +// clear the builder byte buffer content reset_builder :: proc(b: ^Builder) { clear(&b.buf) } - +/* + create an empty builder with the same slice length as its cap + uses the `mem.nil_allocator` to avoid allocation and keep a fixed length + used in `fmt.bprint*` + + bytes: [8]byte // <-- gets filled + builder := strings.builder_from_slice(bytes[:]) + strings.write_byte(&builder, 'a') -> "a" + strings.write_byte(&builder, 'b') -> "ab" +*/ builder_from_slice :: proc(backing: []byte) -> Builder { s := transmute(mem.Raw_Slice)backing d := mem.Raw_Dynamic_Array{ @@ -112,20 +140,36 @@ builder_from_slice :: proc(backing: []byte) -> Builder { buf = transmute([dynamic]byte)d, } } + +// cast the builder byte buffer to a string and return it to_string :: proc(b: Builder) -> string { return string(b.buf[:]) } +// return the length of the builder byte buffer builder_len :: proc(b: Builder) -> int { return len(b.buf) } + +// return the cap of the builder byte buffer builder_cap :: proc(b: Builder) -> int { return cap(b.buf) } + +// returns the space left in the builder byte buffer to use up builder_space :: proc(b: Builder) -> int { - return max(cap(b.buf), len(b.buf), 0) + return cap(b.buf) - len(b.buf) } +/* + appends a byte to the builder, returns the append diff + + builder := strings.make_builder() + strings.write_byte(&builder, 'a') // 1 + strings.write_byte(&builder, 'b') // 1 + strings.write_byte(&builder, 'c') // 1 + fmt.println(strings.to_string(builder)) // -> abc +*/ write_byte :: proc(b: ^Builder, x: byte) -> (n: int) { n0 := len(b.buf) append(&b.buf, x) @@ -133,6 +177,14 @@ write_byte :: proc(b: ^Builder, x: byte) -> (n: int) { return n1-n0 } +/* + appends a slice of bytes to the builder, returns the append diff + + builder := strings.make_builder() + bytes := [?]byte { 'a', 'b', 'c' } + strings.write_bytes(&builder, bytes[:]) // 3 + fmt.println(strings.to_string(builder)) // -> abc +*/ write_bytes :: proc(b: ^Builder, x: []byte) -> (n: int) { n0 := len(b.buf) append(&b.buf, ..x) @@ -140,11 +192,28 @@ write_bytes :: proc(b: ^Builder, x: []byte) -> (n: int) { return n1-n0 } +/* + appends a single rune into the builder, returns written rune size and an `io.Error` + + builder := strings.make_builder() + strings.write_rune_builder(&builder, 'ä') // 2 None + strings.write_rune_builder(&builder, 'b') // 1 None + strings.write_rune_builder(&builder, 'c') // 1 None + fmt.println(strings.to_string(builder)) // -> äbc +*/ write_rune_builder :: proc(b: ^Builder, r: rune) -> (int, io.Error) { return io.write_rune(to_writer(b), r) } +/* + appends a quoted rune into the builder, returns written size + builder := strings.make_builder() + strings.write_string(&builder, "abc") // 3 + strings.write_quoted_rune_builder(&builder, 'ä') // 4 + strings.write_string(&builder, "abc") // 3 + fmt.println(strings.to_string(builder)) // -> abc'ä'abc +*/ write_quoted_rune_builder :: proc(b: ^Builder, r: rune) -> (n: int) { return write_quoted_rune(to_writer(b), r) } @@ -155,7 +224,7 @@ _write_byte :: proc(w: io.Writer, c: byte) -> int { return 1 if err == nil else 0 } - +// writer append a quoted rune into the byte buffer, return the written size write_quoted_rune :: proc(w: io.Writer, r: rune) -> (n: int) { quote := byte('\'') n += _write_byte(w, quote) @@ -173,50 +242,75 @@ write_quoted_rune :: proc(w: io.Writer, r: rune) -> (n: int) { return } - +// overload for `write_string_*` variants write_string :: proc{ write_string_builder, write_string_writer, } +/* + appends a string to the builder, return the written byte size + + builder := strings.make_builder() + strings.write_string(&builder, "a") // 1 + strings.write_string(&builder, "bc") // 2 + strings.write_string(&builder, "xyz") // 3 + fmt.println(strings.to_string(builder)) // -> abcxyz +*/ write_string_builder :: proc(b: ^Builder, s: string) -> (n: int) { return write_string_writer(to_writer(b), s) } +// appends a string to the writer write_string_writer :: proc(w: io.Writer, s: string) -> (n: int) { n, _ = io.write(w, transmute([]byte)s) return } - - - +// pops and returns the last byte in the builder +// returns 0 when the builder is empty pop_byte :: proc(b: ^Builder) -> (r: byte) { if len(b.buf) == 0 { return 0 } + r = b.buf[len(b.buf)-1] d := cast(^mem.Raw_Dynamic_Array)&b.buf d.len = max(d.len-1, 0) return } +// pops the last rune in the builder and returns the popped rune and its rune width +// returns 0, 0 when the builder is empty pop_rune :: proc(b: ^Builder) -> (r: rune, width: int) { + if len(b.buf) == 0 { + return 0, 0 + } + r, width = utf8.decode_last_rune(b.buf[:]) d := cast(^mem.Raw_Dynamic_Array)&b.buf d.len = max(d.len-width, 0) return } - @(private) DIGITS_LOWER := "0123456789abcdefx" +// overload for `write_quoted_string_*` variants write_quoted_string :: proc{ write_quoted_string_builder, write_quoted_string_writer, } +/* + append a quoted string into the builder, return the written byte size + + builder := strings.make_builder() + strings.write_quoted_string(&builder, "a") // 3 + strings.write_quoted_string(&builder, "bc", '\'') // 4 + strings.write_quoted_string(&builder, "xyz") // 5 + fmt.println(strings.to_string(builder)) // -> "a"'bc'xyz" +*/ write_quoted_string_builder :: proc(b: ^Builder, str: string, quote: byte = '"') -> (n: int) { n, _ = io.write_quoted_string(to_writer(b), str, quote) return @@ -228,11 +322,13 @@ write_quoted_string_writer :: proc(w: io.Writer, str: string, quote: byte = '"') return } +// overload for `write_encoded_rune_*` write_encoded_rune :: proc{ write_encoded_rune_builder, write_encoded_rune_writer, } +// appends a rune to the builder, optional `write_quote` boolean tag, returns the written rune size write_encoded_rune_builder :: proc(b: ^Builder, r: rune, write_quote := true) -> (n: int) { n, _ = io.write_encoded_rune(to_writer(b), r, write_quote) return @@ -244,12 +340,15 @@ write_encoded_rune_writer :: proc(w: io.Writer, r: rune, write_quote := true) -> return } - +// overload for `write_escaped_rune_*` write_escaped_rune :: proc{ write_escaped_rune_builder, write_escaped_rune_writer, } +// appends a rune to the builder, fully written out in case of escaped runes e.g. '\a' will be written as such +// when `r` and `quote` match and `quote` is `\\` - they will be written as two slashes +// `html_safe` flag in case the runes '<', '>', '&' should be encoded as digits e.g. `\u0026` write_escaped_rune_builder :: proc(b: ^Builder, r: rune, quote: byte, html_safe := false) -> (n: int) { n, _ = io.write_escaped_rune(to_writer(b), r, quote, html_safe) return @@ -261,21 +360,26 @@ write_escaped_rune_writer :: proc(w: io.Writer, r: rune, quote: byte, html_safe return } - +// writes a u64 value `i` in `base` = 10 into the builder, returns the written amount of characters write_u64 :: proc(b: ^Builder, i: u64, base: int = 10) -> (n: int) { buf: [32]byte s := strconv.append_bits(buf[:], i, base, false, 64, strconv.digits, nil) return write_string(b, s) } + +// writes a i64 value `i` in `base` = 10 into the builder, returns the written amount of characters write_i64 :: proc(b: ^Builder, i: i64, base: int = 10) -> (n: int) { buf: [32]byte s := strconv.append_bits(buf[:], u64(i), base, true, 64, strconv.digits, nil) return write_string(b, s) } +// writes a uint value `i` in `base` = 10 into the builder, returns the written amount of characters write_uint :: proc(b: ^Builder, i: uint, base: int = 10) -> (n: int) { return write_u64(b, u64(i), base) } + +// writes a int value `i` in `base` = 10 into the builder, returns the written amount of characters write_int :: proc(b: ^Builder, i: int, base: int = 10) -> (n: int) { return write_i64(b, i64(i), base) } diff --git a/core/strings/conversion.odin b/core/strings/conversion.odin index b0d42b2eb..5e7110281 100644 --- a/core/strings/conversion.odin +++ b/core/strings/conversion.odin @@ -58,6 +58,13 @@ to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) -> return to_string(b) } +/* + returns the input string `s` with all runes set to lowered case + always allocates using the `allocator` + + strings.to_lower("test") -> test + strings.to_lower("Test") -> test +*/ to_lower :: proc(s: string, allocator := context.allocator) -> string { b: Builder init_builder(&b, 0, len(s), allocator) @@ -66,6 +73,14 @@ to_lower :: proc(s: string, allocator := context.allocator) -> string { } return to_string(b) } + +/* + returns the input string `s` with all runes set to upper case + always allocates using the `allocator` + + strings.to_lower("test") -> TEST + strings.to_lower("Test") -> TEST +*/ to_upper :: proc(s: string, allocator := context.allocator) -> string { b: Builder init_builder(&b, 0, len(s), allocator) @@ -75,13 +90,13 @@ to_upper :: proc(s: string, allocator := context.allocator) -> string { return to_string(b) } - - - +// returns true when the `c` rune is a space, '-' or '_' +// useful when treating strings like words in a text editor or html paths is_delimiter :: proc(c: rune) -> bool { return c == '-' || c == '_' || is_space(c) } +// returns true when the `r` rune is a non alpha or `unicode.is_space` rune is_separator :: proc(r: rune) -> bool { if r <= 0x7f { switch r { @@ -101,7 +116,10 @@ is_separator :: proc(r: rune) -> bool { return unicode.is_space(r) } - +/* + iterator that loops through the string and calls the callback with the `prev`, `curr` and `next` rune + on empty string `s` the callback gets called once with empty runes +*/ string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Writer, prev, curr, next: rune)) { prev, curr: rune for next in s { @@ -122,8 +140,9 @@ string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Write } } - to_lower_camel_case :: to_camel_case + +// converts the `s` string to "lowerCamelCase" to_camel_case :: proc(s: string, allocator := context.allocator) -> string { s := s s = trim_space(s) @@ -147,6 +166,8 @@ to_camel_case :: proc(s: string, allocator := context.allocator) -> string { } to_upper_camel_case :: to_pascal_case + +// converts the `s` string to "PascalCase" to_pascal_case :: proc(s: string, allocator := context.allocator) -> string { s := s s = trim_space(s) @@ -169,6 +190,15 @@ to_pascal_case :: proc(s: string, allocator := context.allocator) -> string { return to_string(b) } +/* + returns the `s` string to words seperated by the given `delimiter` rune + all runes will be upper or lowercased based on the `all_uppercase` bool + + strings.to_delimiter_case("Hello World", '_', false) -> hello_world + strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD + strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD + strings.to_delimiter_case("aBC", '_', false) -> a_b_c +*/ to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string { s := s s = trim_space(s) @@ -208,24 +238,34 @@ to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allo return to_string(b) } - +/* + converts the `s` string to "snake_case" with all runes lowercased + + strings.to_snake_case("HelloWorld") -> hello_world + strings.to_snake_case("Hello World") -> hello_world +*/ to_snake_case :: proc(s: string, allocator := context.allocator) -> string { return to_delimiter_case(s, '_', false, allocator) } to_screaming_snake_case :: to_upper_snake_case + +// converts the `s` string to "SNAKE_CASE" with all runes uppercased to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string { return to_delimiter_case(s, '_', true, allocator) } +// converts the `s` string to "kebab-case" with all runes lowercased to_kebab_case :: proc(s: string, allocator := context.allocator) -> string { return to_delimiter_case(s, '-', false, allocator) } -to_upper_case :: proc(s: string, allocator := context.allocator) -> string { +// converts the `s` string to "KEBAB-CASE" with all runes uppercased +to_upper_kebab_case :: proc(s: string, allocator := context.allocator) -> string { return to_delimiter_case(s, '-', true, allocator) } +// converts the `s` string to "Ada_case" to_ada_case :: proc(s: string, allocator := context.allocator) -> string { delimiter :: '_' diff --git a/core/strings/intern.odin b/core/strings/intern.odin index ff26d7dbb..27c3db084 100644 --- a/core/strings/intern.odin +++ b/core/strings/intern.odin @@ -2,21 +2,26 @@ package strings import "core:mem" +// custom string entry struct Intern_Entry :: struct { len: int, str: [1]byte, // string is allocated inline with the entry to keep allocations simple } +// "intern" is a more memory efficient string map +// `allocator` is used to allocate the actual `Intern_Entry` strings Intern :: struct { allocator: mem.Allocator, entries: map[string]^Intern_Entry, } +// initialize the entries map and set the allocator for the string entries intern_init :: proc(m: ^Intern, allocator := context.allocator, map_allocator := context.allocator) { m.allocator = allocator m.entries = make(map[string]^Intern_Entry, 16, map_allocator) } +// free the map and all its content allocated using the `.allocator` intern_destroy :: proc(m: ^Intern) { for _, value in m.entries { free(value, m.allocator) @@ -24,15 +29,22 @@ intern_destroy :: proc(m: ^Intern) { delete(m.entries) } +// returns the `text` string from the intern map - gets set if it didnt exist yet +// the returned string lives as long as the map entry lives intern_get :: proc(m: ^Intern, text: string) -> string { entry := _intern_get_entry(m, text) #no_bounds_check return string(entry.str[:entry.len]) } + +// returns the `text` cstring from the intern map - gets set if it didnt exist yet +// the returned cstring lives as long as the map entry lives intern_get_cstring :: proc(m: ^Intern, text: string) -> cstring { entry := _intern_get_entry(m, text) return cstring(&entry.str[0]) } +// looks up wether the `text` string exists in the map, returns the entry +// sets & allocates the entry if it wasnt set yet _intern_get_entry :: proc(m: ^Intern, text: string) -> ^Intern_Entry #no_bounds_check { if prev, ok := m.entries[text]; ok { return prev diff --git a/core/strings/reader.odin b/core/strings/reader.odin index ba266c0b5..9b2e10b68 100644 --- a/core/strings/reader.odin +++ b/core/strings/reader.odin @@ -3,46 +3,60 @@ package strings import "core:io" import "core:unicode/utf8" +/* + io stream data for a string reader that can read based on bytes or runes + implements the vtable when using the io.Reader variants + "read" calls advance the current reading offset `i` +*/ Reader :: struct { s: string, // read-only buffer i: i64, // current reading index prev_rune: int, // previous reading index of rune or < 0 } +// init the reader to the string `s` reader_init :: proc(r: ^Reader, s: string) { r.s = s r.i = 0 r.prev_rune = -1 } +// returns a stream from the reader data reader_to_stream :: proc(r: ^Reader) -> (s: io.Stream) { s.stream_data = r s.stream_vtable = _reader_vtable return } +// init a reader to the string `s` and return an io.Reader to_reader :: proc(r: ^Reader, s: string) -> io.Reader { reader_init(r, s) rr, _ := io.to_reader(reader_to_stream(r)) return rr } + +// init a reader to the string `s` and return an io.Reader_At to_reader_at :: proc(r: ^Reader, s: string) -> io.Reader_At { reader_init(r, s) rr, _ := io.to_reader_at(reader_to_stream(r)) return rr } + +// init a reader to the string `s` and return an io.Byte_Reader to_byte_reader :: proc(r: ^Reader, s: string) -> io.Byte_Reader { reader_init(r, s) rr, _ := io.to_byte_reader(reader_to_stream(r)) return rr } + +// init a reader to the string `s` and return an io.Rune_Reader to_rune_reader :: proc(r: ^Reader, s: string) -> io.Rune_Reader { reader_init(r, s) rr, _ := io.to_rune_reader(reader_to_stream(r)) return rr } - +// remaining length of the reader reader_length :: proc(r: ^Reader) -> int { if r.i >= i64(len(r.s)) { return 0 @@ -50,10 +64,13 @@ reader_length :: proc(r: ^Reader) -> int { return int(i64(len(r.s)) - r.i) } +// returns the string length stored by the reader reader_size :: proc(r: ^Reader) -> i64 { return i64(len(r.s)) } +// reads len(p) bytes into the slice from the string in the reader +// returns `n` amount of read bytes and an io.Error reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) { if r.i >= i64(len(r.s)) { return 0, .EOF @@ -63,6 +80,9 @@ reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) { r.i += i64(n) return } + +// reads len(p) bytes into the slice from the string in the reader at an offset +// returns `n` amount of read bytes and an io.Error reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Error) { if off < 0 { return 0, .Invalid_Offset @@ -76,6 +96,8 @@ reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Erro } return } + +// reads and returns a single byte - error when out of bounds reader_read_byte :: proc(r: ^Reader) -> (byte, io.Error) { r.prev_rune = -1 if r.i >= i64(len(r.s)) { @@ -85,6 +107,8 @@ reader_read_byte :: proc(r: ^Reader) -> (byte, io.Error) { r.i += 1 return b, nil } + +// decreases the reader offset - error when below 0 reader_unread_byte :: proc(r: ^Reader) -> io.Error { if r.i <= 0 { return .Invalid_Unread @@ -93,6 +117,8 @@ reader_unread_byte :: proc(r: ^Reader) -> io.Error { r.i -= 1 return nil } + +// reads and returns a single rune and the rune size - error when out bounds reader_read_rune :: proc(r: ^Reader) -> (ch: rune, size: int, err: io.Error) { if r.i >= i64(len(r.s)) { r.prev_rune = -1 @@ -107,6 +133,9 @@ reader_read_rune :: proc(r: ^Reader) -> (ch: rune, size: int, err: io.Error) { r.i += i64(size) return } + +// decreases the reader offset by the last rune +// can only be used once and after a valid read_rune call reader_unread_rune :: proc(r: ^Reader) -> io.Error { if r.i <= 0 { return .Invalid_Unread @@ -118,6 +147,8 @@ reader_unread_rune :: proc(r: ^Reader) -> io.Error { r.prev_rune = -1 return nil } + +// seeks the reader offset to a wanted offset reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.Error) { r.prev_rune = -1 abs: i64 @@ -138,6 +169,8 @@ reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.E r.i = abs return abs, nil } + +// writes the string content left to read into the io.Writer `w` reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) { r.prev_rune = -1 if r.i >= i64(len(r.s)) { @@ -157,7 +190,6 @@ reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) { return } - @(private) _reader_vtable := &io.Stream_VTable{ impl_size = proc(s: io.Stream) -> i64 {