documentation for the rest of the strings library

This commit is contained in:
Michael Kutowski
2022-04-03 19:37:54 +02:00
committed by GitHub
parent ab91fa6ad5
commit b5aa50aaa4
5 changed files with 212 additions and 22 deletions

View File

@@ -5,6 +5,7 @@ import "core:unicode/utf8"
Ascii_Set :: distinct [8]u32
// create an ascii set of all unique characters in the string
ascii_set_make :: proc(chars: string) -> (as: Ascii_Set, ok: bool) #no_bounds_check {
for i in 0..<len(chars) {
c := chars[i]
@@ -17,6 +18,7 @@ ascii_set_make :: proc(chars: string) -> (as: Ascii_Set, ok: bool) #no_bounds_ch
return
}
// returns true when the `c` byte is contained in the `as` ascii set
ascii_set_contains :: proc(as: Ascii_Set, c: byte) -> bool #no_bounds_check {
return as[c>>5] & (1<<(c&31)) != 0
}

View File

@@ -7,40 +7,56 @@ import "core:io"
Builder_Flush_Proc :: #type proc(b: ^Builder) -> (do_reset: bool)
/*
dynamic byte buffer / string builder with helper procedures
the dynamic array is wrapped inside the struct to be more opaque
you can use `fmt.sbprint*` procedures with a `^strings.Builder` directly
*/
Builder :: struct {
buf: [dynamic]byte,
}
// return a builder, default length 0 / cap 16 are done through make
make_builder_none :: proc(allocator := context.allocator) -> Builder {
return Builder{buf=make([dynamic]byte, allocator)}
}
// return a builder, with a set length `len` and cap 16 byte buffer
make_builder_len :: proc(len: int, allocator := context.allocator) -> Builder {
return Builder{buf=make([dynamic]byte, len, allocator)}
}
// return a builder, with a set length `len` byte buffer and a custom `cap`
make_builder_len_cap :: proc(len, cap: int, allocator := context.allocator) -> Builder {
return Builder{buf=make([dynamic]byte, len, cap, allocator)}
}
// overload simple `make_builder_*` with or without len / cap parameters
make_builder :: proc{
make_builder_none,
make_builder_len,
make_builder_len_cap,
}
// initialize a builder, default length 0 / cap 16 are done through make
// replaces the existing `buf`
init_builder_none :: proc(b: ^Builder, allocator := context.allocator) {
b.buf = make([dynamic]byte, allocator)
}
// initialize a builder, with a set length `len` and cap 16 byte buffer
// replaces the existing `buf`
init_builder_len :: proc(b: ^Builder, len: int, allocator := context.allocator) {
b.buf = make([dynamic]byte, len, allocator)
}
// initialize a builder, with a set length `len` byte buffer and a custom `cap`
// replaces the existing `buf`
init_builder_len_cap :: proc(b: ^Builder, len, cap: int, allocator := context.allocator) {
b.buf = make([dynamic]byte, len, cap, allocator)
}
// overload simple `init_builder_*` with or without len / ap parameters
init_builder :: proc{
init_builder_none,
init_builder_len,
@@ -76,30 +92,42 @@ _builder_stream_vtable := &io.Stream_VTable{
},
}
// return an `io.Stream` from a builder
to_stream :: proc(b: ^Builder) -> io.Stream {
return io.Stream{stream_vtable=_builder_stream_vtable, stream_data=b}
}
// return an `io.Writer` from a builder
to_writer :: proc(b: ^Builder) -> io.Writer {
return io.to_writer(to_stream(b))
}
// delete and clear the builder byte buffer content
destroy_builder :: proc(b: ^Builder) {
delete(b.buf)
clear(&b.buf)
}
// reserve the builfer byte buffer to a specific cap, when it's higher than before
grow_builder :: proc(b: ^Builder, cap: int) {
reserve(&b.buf, cap)
}
// clear the builder byte buffer content
reset_builder :: proc(b: ^Builder) {
clear(&b.buf)
}
/*
create an empty builder with the same slice length as its cap
uses the `mem.nil_allocator` to avoid allocation and keep a fixed length
used in `fmt.bprint*`
bytes: [8]byte // <-- gets filled
builder := strings.builder_from_slice(bytes[:])
strings.write_byte(&builder, 'a') -> "a"
strings.write_byte(&builder, 'b') -> "ab"
*/
builder_from_slice :: proc(backing: []byte) -> Builder {
s := transmute(mem.Raw_Slice)backing
d := mem.Raw_Dynamic_Array{
@@ -112,20 +140,36 @@ builder_from_slice :: proc(backing: []byte) -> Builder {
buf = transmute([dynamic]byte)d,
}
}
// cast the builder byte buffer to a string and return it
to_string :: proc(b: Builder) -> string {
return string(b.buf[:])
}
// return the length of the builder byte buffer
builder_len :: proc(b: Builder) -> int {
return len(b.buf)
}
// return the cap of the builder byte buffer
builder_cap :: proc(b: Builder) -> int {
return cap(b.buf)
}
// returns the space left in the builder byte buffer to use up
builder_space :: proc(b: Builder) -> int {
return max(cap(b.buf), len(b.buf), 0)
return cap(b.buf) - len(b.buf)
}
/*
appends a byte to the builder, returns the append diff
builder := strings.make_builder()
strings.write_byte(&builder, 'a') // 1
strings.write_byte(&builder, 'b') // 1
strings.write_byte(&builder, 'c') // 1
fmt.println(strings.to_string(builder)) // -> abc
*/
write_byte :: proc(b: ^Builder, x: byte) -> (n: int) {
n0 := len(b.buf)
append(&b.buf, x)
@@ -133,6 +177,14 @@ write_byte :: proc(b: ^Builder, x: byte) -> (n: int) {
return n1-n0
}
/*
appends a slice of bytes to the builder, returns the append diff
builder := strings.make_builder()
bytes := [?]byte { 'a', 'b', 'c' }
strings.write_bytes(&builder, bytes[:]) // 3
fmt.println(strings.to_string(builder)) // -> abc
*/
write_bytes :: proc(b: ^Builder, x: []byte) -> (n: int) {
n0 := len(b.buf)
append(&b.buf, ..x)
@@ -140,11 +192,28 @@ write_bytes :: proc(b: ^Builder, x: []byte) -> (n: int) {
return n1-n0
}
/*
appends a single rune into the builder, returns written rune size and an `io.Error`
builder := strings.make_builder()
strings.write_rune_builder(&builder, 'ä') // 2 None
strings.write_rune_builder(&builder, 'b') // 1 None
strings.write_rune_builder(&builder, 'c') // 1 None
fmt.println(strings.to_string(builder)) // -> äbc
*/
write_rune_builder :: proc(b: ^Builder, r: rune) -> (int, io.Error) {
return io.write_rune(to_writer(b), r)
}
/*
appends a quoted rune into the builder, returns written size
builder := strings.make_builder()
strings.write_string(&builder, "abc") // 3
strings.write_quoted_rune_builder(&builder, 'ä') // 4
strings.write_string(&builder, "abc") // 3
fmt.println(strings.to_string(builder)) // -> abc'ä'abc
*/
write_quoted_rune_builder :: proc(b: ^Builder, r: rune) -> (n: int) {
return write_quoted_rune(to_writer(b), r)
}
@@ -155,7 +224,7 @@ _write_byte :: proc(w: io.Writer, c: byte) -> int {
return 1 if err == nil else 0
}
// writer append a quoted rune into the byte buffer, return the written size
write_quoted_rune :: proc(w: io.Writer, r: rune) -> (n: int) {
quote := byte('\'')
n += _write_byte(w, quote)
@@ -173,50 +242,75 @@ write_quoted_rune :: proc(w: io.Writer, r: rune) -> (n: int) {
return
}
// overload for `write_string_*` variants
write_string :: proc{
write_string_builder,
write_string_writer,
}
/*
appends a string to the builder, return the written byte size
builder := strings.make_builder()
strings.write_string(&builder, "a") // 1
strings.write_string(&builder, "bc") // 2
strings.write_string(&builder, "xyz") // 3
fmt.println(strings.to_string(builder)) // -> abcxyz
*/
write_string_builder :: proc(b: ^Builder, s: string) -> (n: int) {
return write_string_writer(to_writer(b), s)
}
// appends a string to the writer
write_string_writer :: proc(w: io.Writer, s: string) -> (n: int) {
n, _ = io.write(w, transmute([]byte)s)
return
}
// pops and returns the last byte in the builder
// returns 0 when the builder is empty
pop_byte :: proc(b: ^Builder) -> (r: byte) {
if len(b.buf) == 0 {
return 0
}
r = b.buf[len(b.buf)-1]
d := cast(^mem.Raw_Dynamic_Array)&b.buf
d.len = max(d.len-1, 0)
return
}
// pops the last rune in the builder and returns the popped rune and its rune width
// returns 0, 0 when the builder is empty
pop_rune :: proc(b: ^Builder) -> (r: rune, width: int) {
if len(b.buf) == 0 {
return 0, 0
}
r, width = utf8.decode_last_rune(b.buf[:])
d := cast(^mem.Raw_Dynamic_Array)&b.buf
d.len = max(d.len-width, 0)
return
}
@(private)
DIGITS_LOWER := "0123456789abcdefx"
// overload for `write_quoted_string_*` variants
write_quoted_string :: proc{
write_quoted_string_builder,
write_quoted_string_writer,
}
/*
append a quoted string into the builder, return the written byte size
builder := strings.make_builder()
strings.write_quoted_string(&builder, "a") // 3
strings.write_quoted_string(&builder, "bc", '\'') // 4
strings.write_quoted_string(&builder, "xyz") // 5
fmt.println(strings.to_string(builder)) // -> "a"'bc'xyz"
*/
write_quoted_string_builder :: proc(b: ^Builder, str: string, quote: byte = '"') -> (n: int) {
n, _ = io.write_quoted_string(to_writer(b), str, quote)
return
@@ -228,11 +322,13 @@ write_quoted_string_writer :: proc(w: io.Writer, str: string, quote: byte = '"')
return
}
// overload for `write_encoded_rune_*`
write_encoded_rune :: proc{
write_encoded_rune_builder,
write_encoded_rune_writer,
}
// appends a rune to the builder, optional `write_quote` boolean tag, returns the written rune size
write_encoded_rune_builder :: proc(b: ^Builder, r: rune, write_quote := true) -> (n: int) {
n, _ = io.write_encoded_rune(to_writer(b), r, write_quote)
return
@@ -244,12 +340,15 @@ write_encoded_rune_writer :: proc(w: io.Writer, r: rune, write_quote := true) ->
return
}
// overload for `write_escaped_rune_*`
write_escaped_rune :: proc{
write_escaped_rune_builder,
write_escaped_rune_writer,
}
// appends a rune to the builder, fully written out in case of escaped runes e.g. '\a' will be written as such
// when `r` and `quote` match and `quote` is `\\` - they will be written as two slashes
// `html_safe` flag in case the runes '<', '>', '&' should be encoded as digits e.g. `\u0026`
write_escaped_rune_builder :: proc(b: ^Builder, r: rune, quote: byte, html_safe := false) -> (n: int) {
n, _ = io.write_escaped_rune(to_writer(b), r, quote, html_safe)
return
@@ -261,21 +360,26 @@ write_escaped_rune_writer :: proc(w: io.Writer, r: rune, quote: byte, html_safe
return
}
// writes a u64 value `i` in `base` = 10 into the builder, returns the written amount of characters
write_u64 :: proc(b: ^Builder, i: u64, base: int = 10) -> (n: int) {
buf: [32]byte
s := strconv.append_bits(buf[:], i, base, false, 64, strconv.digits, nil)
return write_string(b, s)
}
// writes a i64 value `i` in `base` = 10 into the builder, returns the written amount of characters
write_i64 :: proc(b: ^Builder, i: i64, base: int = 10) -> (n: int) {
buf: [32]byte
s := strconv.append_bits(buf[:], u64(i), base, true, 64, strconv.digits, nil)
return write_string(b, s)
}
// writes a uint value `i` in `base` = 10 into the builder, returns the written amount of characters
write_uint :: proc(b: ^Builder, i: uint, base: int = 10) -> (n: int) {
return write_u64(b, u64(i), base)
}
// writes a int value `i` in `base` = 10 into the builder, returns the written amount of characters
write_int :: proc(b: ^Builder, i: int, base: int = 10) -> (n: int) {
return write_i64(b, i64(i), base)
}

View File

@@ -58,6 +58,13 @@ to_valid_utf8 :: proc(s, replacement: string, allocator := context.allocator) ->
return to_string(b)
}
/*
returns the input string `s` with all runes set to lowered case
always allocates using the `allocator`
strings.to_lower("test") -> test
strings.to_lower("Test") -> test
*/
to_lower :: proc(s: string, allocator := context.allocator) -> string {
b: Builder
init_builder(&b, 0, len(s), allocator)
@@ -66,6 +73,14 @@ to_lower :: proc(s: string, allocator := context.allocator) -> string {
}
return to_string(b)
}
/*
returns the input string `s` with all runes set to upper case
always allocates using the `allocator`
strings.to_lower("test") -> TEST
strings.to_lower("Test") -> TEST
*/
to_upper :: proc(s: string, allocator := context.allocator) -> string {
b: Builder
init_builder(&b, 0, len(s), allocator)
@@ -75,13 +90,13 @@ to_upper :: proc(s: string, allocator := context.allocator) -> string {
return to_string(b)
}
// returns true when the `c` rune is a space, '-' or '_'
// useful when treating strings like words in a text editor or html paths
is_delimiter :: proc(c: rune) -> bool {
return c == '-' || c == '_' || is_space(c)
}
// returns true when the `r` rune is a non alpha or `unicode.is_space` rune
is_separator :: proc(r: rune) -> bool {
if r <= 0x7f {
switch r {
@@ -101,7 +116,10 @@ is_separator :: proc(r: rune) -> bool {
return unicode.is_space(r)
}
/*
iterator that loops through the string and calls the callback with the `prev`, `curr` and `next` rune
on empty string `s` the callback gets called once with empty runes
*/
string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Writer, prev, curr, next: rune)) {
prev, curr: rune
for next in s {
@@ -122,8 +140,9 @@ string_case_iterator :: proc(w: io.Writer, s: string, callback: proc(w: io.Write
}
}
to_lower_camel_case :: to_camel_case
// converts the `s` string to "lowerCamelCase"
to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
s := s
s = trim_space(s)
@@ -147,6 +166,8 @@ to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
}
to_upper_camel_case :: to_pascal_case
// converts the `s` string to "PascalCase"
to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
s := s
s = trim_space(s)
@@ -169,6 +190,15 @@ to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
return to_string(b)
}
/*
returns the `s` string to words seperated by the given `delimiter` rune
all runes will be upper or lowercased based on the `all_uppercase` bool
strings.to_delimiter_case("Hello World", '_', false) -> hello_world
strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD
strings.to_delimiter_case("Hello World", ' ', true) -> HELLO WORLD
strings.to_delimiter_case("aBC", '_', false) -> a_b_c
*/
to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string {
s := s
s = trim_space(s)
@@ -208,24 +238,34 @@ to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allo
return to_string(b)
}
/*
converts the `s` string to "snake_case" with all runes lowercased
strings.to_snake_case("HelloWorld") -> hello_world
strings.to_snake_case("Hello World") -> hello_world
*/
to_snake_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '_', false, allocator)
}
to_screaming_snake_case :: to_upper_snake_case
// converts the `s` string to "SNAKE_CASE" with all runes uppercased
to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '_', true, allocator)
}
// converts the `s` string to "kebab-case" with all runes lowercased
to_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '-', false, allocator)
}
to_upper_case :: proc(s: string, allocator := context.allocator) -> string {
// converts the `s` string to "KEBAB-CASE" with all runes uppercased
to_upper_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '-', true, allocator)
}
// converts the `s` string to "Ada_case"
to_ada_case :: proc(s: string, allocator := context.allocator) -> string {
delimiter :: '_'

View File

@@ -2,21 +2,26 @@ package strings
import "core:mem"
// custom string entry struct
Intern_Entry :: struct {
len: int,
str: [1]byte, // string is allocated inline with the entry to keep allocations simple
}
// "intern" is a more memory efficient string map
// `allocator` is used to allocate the actual `Intern_Entry` strings
Intern :: struct {
allocator: mem.Allocator,
entries: map[string]^Intern_Entry,
}
// initialize the entries map and set the allocator for the string entries
intern_init :: proc(m: ^Intern, allocator := context.allocator, map_allocator := context.allocator) {
m.allocator = allocator
m.entries = make(map[string]^Intern_Entry, 16, map_allocator)
}
// free the map and all its content allocated using the `.allocator`
intern_destroy :: proc(m: ^Intern) {
for _, value in m.entries {
free(value, m.allocator)
@@ -24,15 +29,22 @@ intern_destroy :: proc(m: ^Intern) {
delete(m.entries)
}
// returns the `text` string from the intern map - gets set if it didnt exist yet
// the returned string lives as long as the map entry lives
intern_get :: proc(m: ^Intern, text: string) -> string {
entry := _intern_get_entry(m, text)
#no_bounds_check return string(entry.str[:entry.len])
}
// returns the `text` cstring from the intern map - gets set if it didnt exist yet
// the returned cstring lives as long as the map entry lives
intern_get_cstring :: proc(m: ^Intern, text: string) -> cstring {
entry := _intern_get_entry(m, text)
return cstring(&entry.str[0])
}
// looks up wether the `text` string exists in the map, returns the entry
// sets & allocates the entry if it wasnt set yet
_intern_get_entry :: proc(m: ^Intern, text: string) -> ^Intern_Entry #no_bounds_check {
if prev, ok := m.entries[text]; ok {
return prev

View File

@@ -3,46 +3,60 @@ package strings
import "core:io"
import "core:unicode/utf8"
/*
io stream data for a string reader that can read based on bytes or runes
implements the vtable when using the io.Reader variants
"read" calls advance the current reading offset `i`
*/
Reader :: struct {
s: string, // read-only buffer
i: i64, // current reading index
prev_rune: int, // previous reading index of rune or < 0
}
// init the reader to the string `s`
reader_init :: proc(r: ^Reader, s: string) {
r.s = s
r.i = 0
r.prev_rune = -1
}
// returns a stream from the reader data
reader_to_stream :: proc(r: ^Reader) -> (s: io.Stream) {
s.stream_data = r
s.stream_vtable = _reader_vtable
return
}
// init a reader to the string `s` and return an io.Reader
to_reader :: proc(r: ^Reader, s: string) -> io.Reader {
reader_init(r, s)
rr, _ := io.to_reader(reader_to_stream(r))
return rr
}
// init a reader to the string `s` and return an io.Reader_At
to_reader_at :: proc(r: ^Reader, s: string) -> io.Reader_At {
reader_init(r, s)
rr, _ := io.to_reader_at(reader_to_stream(r))
return rr
}
// init a reader to the string `s` and return an io.Byte_Reader
to_byte_reader :: proc(r: ^Reader, s: string) -> io.Byte_Reader {
reader_init(r, s)
rr, _ := io.to_byte_reader(reader_to_stream(r))
return rr
}
// init a reader to the string `s` and return an io.Rune_Reader
to_rune_reader :: proc(r: ^Reader, s: string) -> io.Rune_Reader {
reader_init(r, s)
rr, _ := io.to_rune_reader(reader_to_stream(r))
return rr
}
// remaining length of the reader
reader_length :: proc(r: ^Reader) -> int {
if r.i >= i64(len(r.s)) {
return 0
@@ -50,10 +64,13 @@ reader_length :: proc(r: ^Reader) -> int {
return int(i64(len(r.s)) - r.i)
}
// returns the string length stored by the reader
reader_size :: proc(r: ^Reader) -> i64 {
return i64(len(r.s))
}
// reads len(p) bytes into the slice from the string in the reader
// returns `n` amount of read bytes and an io.Error
reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
if r.i >= i64(len(r.s)) {
return 0, .EOF
@@ -63,6 +80,9 @@ reader_read :: proc(r: ^Reader, p: []byte) -> (n: int, err: io.Error) {
r.i += i64(n)
return
}
// reads len(p) bytes into the slice from the string in the reader at an offset
// returns `n` amount of read bytes and an io.Error
reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Error) {
if off < 0 {
return 0, .Invalid_Offset
@@ -76,6 +96,8 @@ reader_read_at :: proc(r: ^Reader, p: []byte, off: i64) -> (n: int, err: io.Erro
}
return
}
// reads and returns a single byte - error when out of bounds
reader_read_byte :: proc(r: ^Reader) -> (byte, io.Error) {
r.prev_rune = -1
if r.i >= i64(len(r.s)) {
@@ -85,6 +107,8 @@ reader_read_byte :: proc(r: ^Reader) -> (byte, io.Error) {
r.i += 1
return b, nil
}
// decreases the reader offset - error when below 0
reader_unread_byte :: proc(r: ^Reader) -> io.Error {
if r.i <= 0 {
return .Invalid_Unread
@@ -93,6 +117,8 @@ reader_unread_byte :: proc(r: ^Reader) -> io.Error {
r.i -= 1
return nil
}
// reads and returns a single rune and the rune size - error when out bounds
reader_read_rune :: proc(r: ^Reader) -> (ch: rune, size: int, err: io.Error) {
if r.i >= i64(len(r.s)) {
r.prev_rune = -1
@@ -107,6 +133,9 @@ reader_read_rune :: proc(r: ^Reader) -> (ch: rune, size: int, err: io.Error) {
r.i += i64(size)
return
}
// decreases the reader offset by the last rune
// can only be used once and after a valid read_rune call
reader_unread_rune :: proc(r: ^Reader) -> io.Error {
if r.i <= 0 {
return .Invalid_Unread
@@ -118,6 +147,8 @@ reader_unread_rune :: proc(r: ^Reader) -> io.Error {
r.prev_rune = -1
return nil
}
// seeks the reader offset to a wanted offset
reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.Error) {
r.prev_rune = -1
abs: i64
@@ -138,6 +169,8 @@ reader_seek :: proc(r: ^Reader, offset: i64, whence: io.Seek_From) -> (i64, io.E
r.i = abs
return abs, nil
}
// writes the string content left to read into the io.Writer `w`
reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
r.prev_rune = -1
if r.i >= i64(len(r.s)) {
@@ -157,7 +190,6 @@ reader_write_to :: proc(r: ^Reader, w: io.Writer) -> (n: i64, err: io.Error) {
return
}
@(private)
_reader_vtable := &io.Stream_VTable{
impl_size = proc(s: io.Stream) -> i64 {