Update strings case convertors to be unicode compliant

This commit is contained in:
gingerBill
2020-05-24 17:50:27 +01:00
parent e42f7008fc
commit f06efffe22
3 changed files with 201 additions and 173 deletions

View File

@@ -8,10 +8,27 @@ Builder :: struct {
buf: [dynamic]byte,
}
make_builder :: proc(allocator := context.allocator) -> Builder {
make_builder_none :: proc(allocator := context.allocator) -> Builder {
return Builder{make([dynamic]byte, allocator)};
}
make_builder_len :: proc(len: int, allocator := context.allocator) -> Builder {
return Builder{make([dynamic]byte, len, allocator)};
}
make_builder_len_cap :: proc(len, cap: int, allocator := context.allocator) -> Builder {
return Builder{make([dynamic]byte, len, cap, allocator)};
}
make_builder :: proc{
make_builder_none,
make_builder_len,
make_builder_len_cap,
};
destroy_builder :: proc(b: ^Builder) {
delete(b.buf);
clear(&b.buf);

View File

@@ -678,8 +678,7 @@ trim_null :: proc(s: string) -> string {
// Adjacent invalid bytes are only replaced once
scrub :: proc(s: string, replacement: string, allocator := context.allocator) -> string {
str := s;
b := make_builder(allocator);;
grow_builder(&b, len(str));
b := make_builder(0, len(str), allocator);
has_error := false;
cursor := 0;
@@ -708,193 +707,204 @@ scrub :: proc(s: string, replacement: string, allocator := context.allocator) ->
return to_string(b);
}
to_snake_case :: proc(str: string, allocator := context.allocator) -> string {
buf := make_builder(allocator);
last_chars: [2]rune;
for char, _ in str {
switch char {
case 'A'..'Z':
switch last_chars[1] {
case 'a'..'z', '0'..'9':
write_rune(&buf, '_');
case 'A'..'Z':
write_rune(&buf, last_chars[1] + ('a'-'A'));
}
case 'a'..'z':
switch last_chars[1] {
case 'A'..'Z':
switch last_chars[0] {
case 'A'..'Z':
write_rune(&buf, '_');
}
write_rune(&buf, last_chars[1] + ('a'-'A'));
case '0'..'9':
write_rune(&buf, '_');
}
write_rune(&buf, char);
case '0'..'9':
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1] + ('a'-'A'));
write_rune(&buf, '_');
case 'a'..'z':
write_rune(&buf, '_');
}
write_rune(&buf, char);
case '_':
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1] + ('a'-'A'));
}
write_rune(&buf, char);
case:
unimplemented();
}
last_chars[0] = last_chars[1];
last_chars[1] = char;
to_lower :: proc(s: string, allocator := context.allocator) -> string {
b := make_builder(0, len(s), allocator);
for r in s {
write_rune(&b, unicode.to_lower(r));
}
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1] + ('a'-'A'));
return to_string(b);
}
to_upper :: proc(s: string, allocator := context.allocator) -> string {
b := make_builder(0, len(s), allocator);
for r in s {
write_rune(&b, unicode.to_upper(r));
}
return to_string(buf);
return to_string(b);
}
to_ada_case :: proc(str: string, allocator := context.allocator) -> string {
buf := make_builder(allocator);
last_chars: [2]rune;
for char, _ in str {
switch char {
case 'A'..'Z':
switch last_chars[1] {
case 'a'..'z', '0'..'9':
write_rune(&buf, '_');
case 'A'..'Z':
switch last_chars[0] {
case '_', '\x00':
write_rune(&buf, last_chars[1]);
case:
write_rune(&buf, last_chars[1] + ('a'-'A'));
}
}
case 'a'..'z':
switch last_chars[1] {
case 'A'..'Z':
switch last_chars[0] {
case 'A'..'Z':
write_rune(&buf, '_');
write_rune(&buf, last_chars[1]);
case:
write_rune(&buf, last_chars[1]);
}
write_rune(&buf, char);
case '0'..'9':
write_rune(&buf, '_');
write_rune(&buf, char);
case 'a'..'z':
write_rune(&buf, char);
case '_', '\x00':
write_rune(&buf, char - ('a'-'A'));
}
case '0'..'9':
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1] + ('a'-'A'));
write_rune(&buf, '_');
case 'a'..'z':
write_rune(&buf, '_');
}
write_rune(&buf, char);
case '_':
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1] + ('a'-'A'));
}
write_rune(&buf, char);
case:
write_rune(&buf, char);
}
last_chars[0] = last_chars[1];
last_chars[1] = char;
}
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1] + ('a'-'A'));
}
return to_string(buf);
is_delimiter :: proc(c: rune) -> bool {
return c == '-' || c == '_' || is_space(c);
}
to_screaming_snake_case :: proc(str: string, allocator := context.allocator) -> string {
buf := make_builder(allocator);
is_separator :: proc(r: rune) -> bool {
if r <= 0x7f {
switch r {
case '0'..'9': return false;
case 'a'..'z': return false;
case 'A'..'Z': return false;
case '_': return false;
}
return true;
}
last_chars: [2]rune;
for char, _ in str {
switch char {
case 'A'..'Z':
switch last_chars[1] {
case 'a'..'z', '0'..'9':
write_rune(&buf, '_');
case 'A'..'Z':
write_rune(&buf, last_chars[1]);
}
case 'a'..'z':
switch last_chars[1] {
case 'A'..'Z':
switch last_chars[0] {
case 'A'..'Z':
write_rune(&buf, '_');
write_rune(&buf, last_chars[1]);
case:
write_rune(&buf, last_chars[1]);
}
write_rune(&buf, char - ('a'-'A'));
case '0'..'9':
write_rune(&buf, '_');
write_rune(&buf, char - ('a'-'A'));
case 'a'..'z':
write_rune(&buf, char - ('a'-'A'));
case '_', '\x00':
write_rune(&buf, char - ('a'-'A'));
}
case '0'..'9':
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1]);
write_rune(&buf, '_');
case 'a'..'z':
write_rune(&buf, '_');
}
write_rune(&buf, char);
case '_':
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1]);
}
write_rune(&buf, char);
case:
unimplemented();
// TODO(bill): unicode categories
// if unicode.is_letter(r) || unicode.is_digit(r) {
// return false;
// }
return unicode.is_space(r);
}
string_case_iterator :: proc(b: ^Builder, s: string, callback: proc(b: ^Builder, prev, curr, next: rune)) {
prev, curr: rune;
for next in s {
if curr == 0 {
prev = curr;
curr = next;
continue;
}
last_chars[0] = last_chars[1];
last_chars[1] = char;
callback(b, prev, curr, next);
prev = curr;
curr = next;
}
switch last_chars[1] {
case 'A'..'Z':
write_rune(&buf, last_chars[1]);
if len(s) > 0 {
callback(b, prev, curr, 0);
}
return to_string(buf);
}
to_lower_camel_case :: to_camel_case;
to_camel_case :: proc(s: string, allocator := context.allocator) -> string {
s := trim_space(s);
b := make_builder(0, len(s), allocator);
string_case_iterator(&b, s, proc(b: ^Builder, prev, curr, next: rune) {
if !is_delimiter(curr) {
if is_delimiter(prev) {
write_rune(b, unicode.to_upper(curr));
} else if unicode.is_lower(prev) {
write_rune(b, curr);
} else {
write_rune(b, unicode.to_lower(curr));
}
}
});
return to_string(b);
}
to_upper_camel_case :: to_pascal_case;
to_pascal_case :: proc(s: string, allocator := context.allocator) -> string {
s := trim_space(s);
b := make_builder(0, len(s), allocator);
string_case_iterator(&b, s, proc(b: ^Builder, prev, curr, next: rune) {
if !is_delimiter(curr) {
if is_delimiter(prev) || prev == 0 {
write_rune(b, unicode.to_upper(curr));
} else if unicode.is_lower(prev) {
write_rune(b, curr);
} else {
write_rune(b, unicode.to_lower(curr));
}
}
});
return to_string(b);
}
to_delimiter_case :: proc(s: string, delimiter: rune, all_upper_case: bool, allocator := context.allocator) -> string {
s := trim_space(s);
b := make_builder(0, len(s), allocator);
adjust_case := unicode.to_upper if all_upper_case else unicode.to_lower;
prev, curr: rune;
for next in s {
if is_delimiter(curr) {
if !is_delimiter(prev) {
write_rune(&b, delimiter);
}
} else if unicode.is_upper(curr) {
if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
write_rune(&b, delimiter);
}
write_rune(&b, adjust_case(curr));
} else if curr != 0 {
write_rune(&b, adjust_case(curr));
}
prev = curr;
curr = next;
}
if len(s) > 0 {
if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
write_rune(&b, delimiter);
}
write_rune(&b, adjust_case(curr));
}
return to_string(b);
}
to_snake_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '_', false, allocator);
}
to_screaming_snake_case :: to_upper_snake_case;
to_upper_snake_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '_', true, allocator);
}
to_kebab_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '-', false, allocator);
}
to_upper_case :: proc(s: string, allocator := context.allocator) -> string {
return to_delimiter_case(s, '-', true, allocator);
}
to_ada_case :: proc(s: string, allocator := context.allocator) -> string {
delimiter :: '_';
s := trim_space(s);
b := make_builder(0, len(s), allocator);
prev, curr: rune;
for next in s {
if is_delimiter(curr) {
if !is_delimiter(prev) {
write_rune(&b, delimiter);
}
} else if unicode.is_upper(curr) {
if unicode.is_lower(prev) || (unicode.is_upper(prev) && unicode.is_lower(next)) {
write_rune(&b, delimiter);
}
write_rune(&b, unicode.to_upper(curr));
} else if curr != 0 {
write_rune(&b, unicode.to_lower(curr));
}
prev = curr;
curr = next;
}
if len(s) > 0 {
if unicode.is_upper(curr) && unicode.is_lower(prev) && prev != 0 {
write_rune(&b, delimiter);
write_rune(&b, unicode.to_upper(curr));
} else {
write_rune(&b, unicode.to_lower(curr));
}
}
return to_string(b);
}
reverse :: proc(s: string, allocator := context.allocator) -> string {
str := s;
n := len(str);

View File

@@ -105,7 +105,8 @@ is_title :: proc(r: rune) -> bool {
return is_upper(r) && is_lower(r);
}
is_white_space :: proc(r: rune) -> bool {
is_white_space :: is_space;
is_space :: proc(r: rune) -> bool {
c := i32(r);
p := binary_search(c, space_ranges[:], len(space_ranges)/2, 2);
if p >= 0 && space_ranges[p] <= c && c <= space_ranges[p+1] {