Files
Odin/core/unicode/utf8/utf8string/string.odin

156 lines
2.9 KiB
Odin

package utf8string
import "core:unicode/utf8"
import "core:runtime"
import "builtin"
String :: struct {
contents: string,
rune_count: int,
// cached information
non_ascii: int, // index to non-ascii code points
width: int, // 0 if ascii
byte_pos: int,
rune_pos: int,
}
@(private)
_len :: builtin.len; // helper procedure
init :: proc(s: ^String, contents: string) -> ^String {
s.contents = contents;
s.byte_pos = 0;
s.rune_pos = 0;
for i in 0..<_len(contents) {
if contents[i] >= utf8.RUNE_SELF {
s.rune_count = utf8.rune_count_in_string(contents);
_, s.width = utf8.decode_rune_in_string(contents);
s.non_ascii = i;
return s;
}
}
s.rune_count = _len(contents);
s.width = 0;
s.non_ascii = _len(contents);
return s;
}
to_string :: proc(s: ^String) -> string {
return s.contents;
}
len :: proc(s: ^String) -> int {
return s.rune_count;
}
is_ascii :: proc(s: ^String) -> bool {
return s.width == 0;
}
at :: proc(s: ^String, i: int, loc := #caller_location) -> (r: rune) {
runtime.bounds_check_error_loc(loc, i, s.rune_count);
if i < s.non_ascii {
return rune(s.contents[i]);
}
switch i {
case 0:
r, s.width = utf8.decode_rune_in_string(s.contents);
s.rune_pos = 0;
s.byte_pos = 0;
return;
case s.rune_count-1:
r, s.width = utf8.decode_rune_in_string(s.contents);
s.rune_pos = i;
s.byte_pos = _len(s.contents) - s.width;
return;
case s.rune_pos-1:
r, s.width = utf8.decode_rune_in_string(s.contents[0:s.byte_pos]);
s.rune_pos = i;
s.byte_pos -= s.width;
return;
case s.rune_pos+1:
s.rune_pos = i;
s.byte_pos += s.width;
fallthrough;
case s.rune_pos:
r, s.width = utf8.decode_rune_in_string(s.contents[s.byte_pos:]);
return;
}
// Linear scan
scan_forward := true;
if i < s.rune_pos {
if i < (s.rune_pos-s.non_ascii)/2 {
s.byte_pos, s.rune_pos = s.non_ascii, s.non_ascii;
} else {
scan_forward = false;
}
} else if i-s.rune_pos < (s.rune_count-s.rune_pos)/2 {
// scan_forward = true;
} else {
s.byte_pos, s.rune_pos = _len(s.contents), s.rune_count;
scan_forward = false;
}
if scan_forward {
for {
r, s.width = utf8.decode_rune_in_string(s.contents[s.byte_pos:]);
if s.rune_pos == i {
return;
}
s.rune_pos += 1;
s.byte_pos += s.width;
}
} else {
for {
r, s.width = utf8.decode_last_rune_in_string(s.contents[:s.byte_pos]);
s.rune_pos -= 1;
s.byte_pos -= s.width;
if s.rune_pos == i {
return;
}
}
}
}
slice :: proc(s: ^String, i, j: int, loc := #caller_location) -> string {
runtime.slice_expr_error_lo_hi_loc(loc, i, j, s.rune_count);
if j < s.non_ascii {
return s.contents[i:j];
}
if i == j {
return "";
}
lo, hi: int;
if i < s.non_ascii {
lo = i;
} else if i == s.rune_count {
lo = _len(s.contents);
} else {
at(s, i, loc);
lo = s.byte_pos;
}
if j == s.rune_count {
hi = _len(s.contents);
} else {
at(s, j, loc);
hi = s.byte_pos;
}
return s.contents[lo:hi];
}