diff --git a/core/strings/builder.odin b/core/strings/builder.odin index 21f50d823..a772af10e 100644 --- a/core/strings/builder.odin +++ b/core/strings/builder.odin @@ -8,10 +8,27 @@ Builder :: struct { buf: [dynamic]byte, } -make_builder :: proc(allocator := context.allocator) -> Builder { +make_builder_none :: proc(allocator := context.allocator) -> Builder { return Builder{make([dynamic]byte, allocator)}; } +make_builder_len :: proc(len: int, allocator := context.allocator) -> Builder { + return Builder{make([dynamic]byte, len, allocator)}; +} + +make_builder_len_cap :: proc(len, cap: int, allocator := context.allocator) -> Builder { + return Builder{make([dynamic]byte, len, cap, allocator)}; +} + +make_builder :: proc{ + make_builder_none, + make_builder_len, + make_builder_len_cap, +}; + + + + destroy_builder :: proc(b: ^Builder) { delete(b.buf); clear(&b.buf); diff --git a/core/strings/strings.odin b/core/strings/strings.odin index 2c1c769df..f89438c0a 100644 --- a/core/strings/strings.odin +++ b/core/strings/strings.odin @@ -678,8 +678,7 @@ trim_null :: proc(s: string) -> string { // Adjacent invalid bytes are only replaced once scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> string { str := s; - b := make_builder(allocator);; - grow_builder(&b, len(str)); + b := make_builder(0, len(str), allocator); has_error := false; cursor := 0; @@ -708,193 +707,204 @@ scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> return to_string(b); } -to_snake_case :: proc(str: string, allocator := context.allocator) -> string { - buf := make_builder(allocator); - last_chars: [2]rune; - for char, _ in str { - switch char { - case 'A'..'Z': - switch last_chars[1] { - case 'a'..'z', '0'..'9': - write_rune(&buf, '_'); - case 'A'..'Z': - write_rune(&buf, last_chars[1] + ('a'-'A')); - } - case 'a'..'z': - switch last_chars[1] { - case 'A'..'Z': - switch last_chars[0] { - case 'A'..'Z': - write_rune(&buf, '_'); - } - write_rune(&buf, last_chars[1] + ('a'-'A')); - case '0'..'9': - write_rune(&buf, '_'); - } - write_rune(&buf, char); - case '0'..'9': - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1] + ('a'-'A')); - write_rune(&buf, '_'); - case 'a'..'z': - write_rune(&buf, '_'); - } - write_rune(&buf, char); - case '_': - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1] + ('a'-'A')); - } - write_rune(&buf, char); - case: - unimplemented(); - } - - last_chars[0] = last_chars[1]; - last_chars[1] = char; +to_lower :: proc(s: string, allocator := context.allocator) -> string { + b := make_builder(0, len(s), allocator); + for r in s { + write_rune(&b, unicode.to_lower(r)); } - - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1] + ('a'-'A')); + return to_string(b); +} +to_upper :: proc(s: string, allocator := context.allocator) -> string { + b := make_builder(0, len(s), allocator); + for r in s { + write_rune(&b, unicode.to_upper(r)); } - - return to_string(buf); + return to_string(b); } -to_ada_case :: proc(str: string, allocator := context.allocator) -> string { - buf := make_builder(allocator); - last_chars: [2]rune; - for char, _ in str { - switch char { - case 'A'..'Z': - switch last_chars[1] { - case 'a'..'z', '0'..'9': - write_rune(&buf, '_'); - case 'A'..'Z': - switch last_chars[0] { - case '_', '\x00': - write_rune(&buf, last_chars[1]); - case: - write_rune(&buf, last_chars[1] + ('a'-'A')); - } - } - case 'a'..'z': - switch last_chars[1] { - case 'A'..'Z': - switch last_chars[0] { - case 'A'..'Z': - write_rune(&buf, '_'); - write_rune(&buf, last_chars[1]); - case: - write_rune(&buf, last_chars[1]); - } - write_rune(&buf, char); - case '0'..'9': - write_rune(&buf, '_'); - write_rune(&buf, char); - case 'a'..'z': - write_rune(&buf, char); - case '_', '\x00': - write_rune(&buf, char - ('a'-'A')); - } - case '0'..'9': - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1] + ('a'-'A')); - write_rune(&buf, '_'); - case 'a'..'z': - write_rune(&buf, '_'); - } - write_rune(&buf, char); - case '_': - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1] + ('a'-'A')); - } - write_rune(&buf, char); - case: - write_rune(&buf, char); - } - last_chars[0] = last_chars[1]; - last_chars[1] = char; - } - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1] + ('a'-'A')); - } - - return to_string(buf); +is_delimiter :: proc(c: rune) -> bool { + return c == '-' || c == '_' || is_space(c); } -to_screaming_snake_case :: proc(str: string, allocator := context.allocator) -> string { - buf := make_builder(allocator); +is_separator :: proc(r: rune) -> bool { + if r <= 0x7f { + switch r { + case '0'..'9': return false; + case 'a'..'z': return false; + case 'A'..'Z': return false; + case '_': return false; + } + return true; + } - last_chars: [2]rune; - for char, _ in str { - switch char { - case 'A'..'Z': - switch last_chars[1] { - case 'a'..'z', '0'..'9': - write_rune(&buf, '_'); - case 'A'..'Z': - write_rune(&buf, last_chars[1]); - } - case 'a'..'z': - switch last_chars[1] { - case 'A'..'Z': - switch last_chars[0] { - case 'A'..'Z': - write_rune(&buf, '_'); - write_rune(&buf, last_chars[1]); - case: - write_rune(&buf, last_chars[1]); - } - write_rune(&buf, char - ('a'-'A')); - case '0'..'9': - write_rune(&buf, '_'); - write_rune(&buf, char - ('a'-'A')); - case 'a'..'z': - write_rune(&buf, char - ('a'-'A')); - case '_', '\x00': - write_rune(&buf, char - ('a'-'A')); - } - case '0'..'9': - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1]); - write_rune(&buf, '_'); - case 'a'..'z': - write_rune(&buf, '_'); - } - write_rune(&buf, char); - case '_': - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1]); - } - write_rune(&buf, char); - case: - unimplemented(); + // TODO(bill): unicode categories + // if unicode.is_letter(r) || unicode.is_digit(r) { + // return false; + // } + + return unicode.is_space(r); +} + + +string_case_iterator :: proc(b: ^Builder, s: string, callback: proc(b: ^Builder, prev, curr, next: rune)) { + prev, curr: rune; + for next in s { + if curr == 0 { + prev = curr; + curr = next; + continue; } - last_chars[0] = last_chars[1]; - last_chars[1] = char; + callback(b, prev, curr, next); + + prev = curr; + curr = next; } - switch last_chars[1] { - case 'A'..'Z': - write_rune(&buf, last_chars[1]); + if len(s) > 0 { + callback(b, prev, curr, 0); } - - return to_string(buf); } + +to_lower_camel_case :: to_camel_case; +to_camel_case :: proc(s: string, allocator := context.allocator) -> string { + s := trim_space(s); + b := make_builder(0, len(s), allocator); + + string_case_iterator(&b, s, proc(b: ^Builder, prev, curr, next: rune) { + if !is_delimiter(curr) { + if is_delimiter(prev) { + write_rune(b, unicode.to_upper(curr)); + } else if unicode.is_lower(prev) { + write_rune(b, curr); + } else { + write_rune(b, unicode.to_lower(curr)); + } + } + }); + + return to_string(b); +} + +to_upper_camel_case :: to_pascal_case; +to_pascal_case :: proc(s: string, allocator := context.allocator) -> string { + s := trim_space(s); + b := make_builder(0, len(s), allocator); + + string_case_iterator(&b, s, proc(b: ^Builder, prev, curr, next: rune) { + if !is_delimiter(curr) { + if is_delimiter(prev) || prev == 0 { + write_rune(b, unicode.to_upper(curr)); + } else if unicode.is_lower(prev) { + write_rune(b, curr); + } else { + write_rune(b, unicode.to_lower(curr)); + } + } + }); + + return to_string(b); +} + +to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string { + s := trim_space(s); + b := make_builder(0, len(s), allocator); + + adjust_case := unicode.to_upper if all_upper_case else unicode.to_lower; + + prev, curr: rune; + + for next in s { + if is_delimiter(curr) { + if !is_delimiter(prev) { + write_rune(&b, delimiter); + } + } else if unicode.is_upper(curr) { + if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) { + write_rune(&b, delimiter); + } + write_rune(&b, adjust_case(curr)); + } else if curr != 0 { + write_rune(&b, adjust_case(curr)); + } + + prev = curr; + curr = next; + } + + if len(s) > 0 { + if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 { + write_rune(&b, delimiter); + } + write_rune(&b, adjust_case(curr)); + } + + return to_string(b); +} + + +to_snake_case :: proc(s: string, allocator := context.allocator) -> string { + return to_delimiter_case(s, '_', false, allocator); +} + +to_screaming_snake_case :: to_upper_snake_case; +to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string { + return to_delimiter_case(s, '_', true, allocator); +} + +to_kebab_case :: proc(s: string, allocator := context.allocator) -> string { + return to_delimiter_case(s, '-', false, allocator); +} + +to_upper_case :: proc(s: string, allocator := context.allocator) -> string { + return to_delimiter_case(s, '-', true, allocator); +} + +to_ada_case :: proc(s: string, allocator := context.allocator) -> string { + delimiter :: '_'; + + s := trim_space(s); + b := make_builder(0, len(s), allocator); + + prev, curr: rune; + + for next in s { + if is_delimiter(curr) { + if !is_delimiter(prev) { + write_rune(&b, delimiter); + } + } else if unicode.is_upper(curr) { + if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) { + write_rune(&b, delimiter); + } + write_rune(&b, unicode.to_upper(curr)); + } else if curr != 0 { + write_rune(&b, unicode.to_lower(curr)); + } + + prev = curr; + curr = next; + } + + if len(s) > 0 { + if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 { + write_rune(&b, delimiter); + write_rune(&b, unicode.to_upper(curr)); + } else { + write_rune(&b, unicode.to_lower(curr)); + } + } + + return to_string(b); +} + + + reverse :: proc(s: string, allocator := context.allocator) -> string { str := s; n := len(str); diff --git a/core/unicode/letter.odin b/core/unicode/letter.odin index 09e6fb0ac..bd701ab85 100644 --- a/core/unicode/letter.odin +++ b/core/unicode/letter.odin @@ -105,7 +105,8 @@ is_title :: proc(r: rune) -> bool { return is_upper(r) && is_lower(r); } -is_white_space :: proc(r: rune) -> bool { +is_white_space :: is_space; +is_space :: proc(r: rune) -> bool { c := i32(r); p := binary_search(c, space_ranges[:], len(space_ranges)/2, 2); if p >= 0 && space_ranges[p] <= c && c <= space_ranges[p+1] {