mirror of
https://github.com/odin-lang/Odin.git
synced 2025-12-29 17:34:34 +00:00
156 lines
2.9 KiB
Odin
156 lines
2.9 KiB
Odin
package utf8string
|
|
|
|
import "core:unicode/utf8"
|
|
import "core:runtime"
|
|
import "core:builtin"
|
|
|
|
String :: struct {
|
|
contents: string,
|
|
rune_count: int,
|
|
|
|
// cached information
|
|
non_ascii: int, // index to non-ascii code points
|
|
width: int, // 0 if ascii
|
|
byte_pos: int,
|
|
rune_pos: int,
|
|
}
|
|
|
|
@(private)
|
|
_len :: builtin.len // helper procedure
|
|
|
|
init :: proc(s: ^String, contents: string) -> ^String {
|
|
s.contents = contents
|
|
s.byte_pos = 0
|
|
s.rune_pos = 0
|
|
|
|
for i in 0..<_len(contents) {
|
|
if contents[i] >= utf8.RUNE_SELF {
|
|
s.rune_count = utf8.rune_count_in_string(contents)
|
|
_, s.width = utf8.decode_rune_in_string(contents)
|
|
s.non_ascii = i
|
|
return s
|
|
}
|
|
}
|
|
|
|
s.rune_count = _len(contents)
|
|
s.width = 0
|
|
s.non_ascii = _len(contents)
|
|
return s
|
|
}
|
|
|
|
to_string :: proc(s: ^String) -> string {
|
|
return s.contents
|
|
}
|
|
|
|
len :: proc(s: ^String) -> int {
|
|
return s.rune_count
|
|
}
|
|
|
|
|
|
is_ascii :: proc(s: ^String) -> bool {
|
|
return s.width == 0
|
|
}
|
|
|
|
at :: proc(s: ^String, i: int, loc := #caller_location) -> (r: rune) {
|
|
runtime.bounds_check_error_loc(loc, i, s.rune_count)
|
|
|
|
if i < s.non_ascii {
|
|
return rune(s.contents[i])
|
|
}
|
|
|
|
switch i {
|
|
case 0:
|
|
r, s.width = utf8.decode_rune_in_string(s.contents)
|
|
s.rune_pos = 0
|
|
s.byte_pos = 0
|
|
return
|
|
|
|
case s.rune_count-1:
|
|
r, s.width = utf8.decode_rune_in_string(s.contents)
|
|
s.rune_pos = i
|
|
s.byte_pos = _len(s.contents) - s.width
|
|
return
|
|
|
|
case s.rune_pos-1:
|
|
r, s.width = utf8.decode_rune_in_string(s.contents[0:s.byte_pos])
|
|
s.rune_pos = i
|
|
s.byte_pos -= s.width
|
|
return
|
|
|
|
case s.rune_pos+1:
|
|
s.rune_pos = i
|
|
s.byte_pos += s.width
|
|
fallthrough
|
|
case s.rune_pos:
|
|
r, s.width = utf8.decode_rune_in_string(s.contents[s.byte_pos:])
|
|
return
|
|
}
|
|
|
|
// Linear scan
|
|
scan_forward := true
|
|
if i < s.rune_pos {
|
|
if i < (s.rune_pos-s.non_ascii)/2 {
|
|
s.byte_pos, s.rune_pos = s.non_ascii, s.non_ascii
|
|
} else {
|
|
scan_forward = false
|
|
}
|
|
} else if i-s.rune_pos < (s.rune_count-s.rune_pos)/2 {
|
|
// scan_forward = true
|
|
} else {
|
|
s.byte_pos, s.rune_pos = _len(s.contents), s.rune_count
|
|
scan_forward = false
|
|
}
|
|
|
|
if scan_forward {
|
|
for {
|
|
r, s.width = utf8.decode_rune_in_string(s.contents[s.byte_pos:])
|
|
if s.rune_pos == i {
|
|
return
|
|
}
|
|
s.rune_pos += 1
|
|
s.byte_pos += s.width
|
|
|
|
}
|
|
} else {
|
|
for {
|
|
r, s.width = utf8.decode_last_rune_in_string(s.contents[:s.byte_pos])
|
|
s.rune_pos -= 1
|
|
s.byte_pos -= s.width
|
|
if s.rune_pos == i {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
slice :: proc(s: ^String, i, j: int, loc := #caller_location) -> string {
|
|
runtime.slice_expr_error_lo_hi_loc(loc, i, j, s.rune_count)
|
|
|
|
if j < s.non_ascii {
|
|
return s.contents[i:j]
|
|
}
|
|
|
|
if i == j {
|
|
return ""
|
|
}
|
|
|
|
lo, hi: int
|
|
if i < s.non_ascii {
|
|
lo = i
|
|
} else if i == s.rune_count {
|
|
lo = _len(s.contents)
|
|
} else {
|
|
at(s, i, loc)
|
|
lo = s.byte_pos
|
|
}
|
|
|
|
if j == s.rune_count {
|
|
hi = _len(s.contents)
|
|
} else {
|
|
at(s, j, loc)
|
|
hi = s.byte_pos
|
|
}
|
|
|
|
return s.contents[lo:hi]
|
|
}
|