Add strings.fields and strings.fields_proc

This commit is contained in:
gingerBill
2020-12-04 18:50:05 +00:00
parent fd453be831
commit 7268c80d64

View File

@@ -467,10 +467,12 @@ replace :: proc(s, old, new: string, n: int, allocator := context.allocator) ->
return;
}
@(private) _ascii_space := [256]u8{'\t' = 1, '\n' = 1, '\v' = 1, '\f' = 1, '\r' = 1, ' ' = 1};
is_ascii_space :: proc(r: rune) -> bool {
switch r {
case '\t', '\n', '\v', '\f', '\r', ' ':
return true;
if r < utf8.RUNE_SELF {
return _ascii_space[u8(r)] != 0;
}
return false;
}
@@ -949,3 +951,94 @@ write_pad_string :: proc(w: io.Writer, pad: string, pad_len, remains: int) {
p = p[width:];
}
}
// fields splits the string s around each instance of one or more consecutive white space character, defined by unicode.is_space
// returning a slice of substrings of s or an empty slice if s only contains white space
fields :: proc(s: string, allocator := context.allocator) -> []string #no_bounds_check {
n := 0;
was_space := 1;
set_bits := u8(0);
// check to see
for i in 0..<len(s) {
r := s[i];
set_bits |= r;
is_space := int(_ascii_space[r]);
n += was_space & ~is_space;
was_space = is_space;
}
if set_bits >= utf8.RUNE_SELF {
return fields_proc(s, unicode.is_space, allocator);
}
if n == 0 {
return nil;
}
a := make([]string, n, allocator);
na := 0;
field_start := 0;
i := 0;
for i < len(s) && _ascii_space[s[i]] != 0 {
i += 1;
}
field_start = i;
for i < len(s) {
if _ascii_space[s[i]] == 0 {
i += 1;
continue;
}
a[na] = s[field_start : i];
na += 1;
i += 1;
for i < len(s) && _ascii_space[s[i]] != 0 {
i += 1;
}
field_start = i;
}
if field_start < len(s) {
a[na] = s[field_start:];
}
return a;
}
// fields_proc splits the string s at each run of unicode code points `ch` satisfying f(ch)
// returns a slice of substrings of s
// If all code points in s satisfy f(ch) or string is empty, an empty slice is returned
//
// fields_proc makes no guarantee about the order in which it calls f(ch)
// it assumes that `f` always returns the same value for a given ch
fields_proc :: proc(s: string, f: proc(rune) -> bool, allocator := context.allocator) -> []string #no_bounds_check {
Span :: struct {
start: int,
end: int,
};
spans := make([dynamic]string, 0, 32, allocator);
start := -1;
for end, r in s {
if f(r) {
if start >= 0 {
append(&spans, s[start : end]);
// -1 could be used, but just speed it up through bitwise not
// gotta love 2's complement
start = ~start;
}
} else {
if start < 0 {
start = end;
}
}
}
if start >= 0 {
append(&spans, s[start : end]);
}
return spans[:];
}