mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-19 04:50:29 +00:00
add proper unicode walking
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
package strlib
|
||||
|
||||
import "core:unicode"
|
||||
import "core:unicode/utf8"
|
||||
import "core:strings"
|
||||
|
||||
MAXCAPTURES :: 32
|
||||
@@ -10,7 +12,7 @@ Capture :: struct {
|
||||
}
|
||||
|
||||
Match :: struct {
|
||||
start, end: int,
|
||||
byte_start, byte_end: int,
|
||||
}
|
||||
|
||||
Error :: enum {
|
||||
@@ -20,6 +22,7 @@ Error :: enum {
|
||||
Invalid_Pattern_Capture,
|
||||
Unfinished_Capture,
|
||||
Malformed_Pattern,
|
||||
Rune_Error,
|
||||
}
|
||||
|
||||
L_ESC :: '%'
|
||||
@@ -34,8 +37,8 @@ MatchState :: struct {
|
||||
capture: [MAXCAPTURES]Capture,
|
||||
}
|
||||
|
||||
match_class :: proc(c: u8, cl: u8) -> (res: bool) {
|
||||
switch tolower(cl) {
|
||||
match_class :: proc(c: rune, cl: rune) -> (res: bool) {
|
||||
switch unicode.to_lower(cl) {
|
||||
case 'a': res = isalpha(c)
|
||||
case 'c': res = iscntrl(c)
|
||||
case 'd': res = isdigit(c)
|
||||
@@ -52,53 +55,92 @@ match_class :: proc(c: u8, cl: u8) -> (res: bool) {
|
||||
return islower(cl) ? res : !res
|
||||
}
|
||||
|
||||
isalpha :: proc(c: u8) -> bool {
|
||||
return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
|
||||
isalpha :: proc(c: rune) -> bool {
|
||||
return unicode.is_alpha(c)
|
||||
}
|
||||
|
||||
isdigit :: proc(c: u8) -> bool {
|
||||
return '0' <= c && c <= '9'
|
||||
isdigit :: proc(c: rune) -> bool {
|
||||
return unicode.is_digit(c)
|
||||
}
|
||||
|
||||
isalnum :: proc(c: u8) -> bool {
|
||||
return isalpha(c) || isdigit(c)
|
||||
isalnum :: proc(c: rune) -> bool {
|
||||
return unicode.is_alpha(c) || unicode.is_digit(c)
|
||||
}
|
||||
|
||||
iscntrl :: proc(c: u8) -> bool {
|
||||
return c <= '\007' || (c >= '\010' && c <= '\017') || (c >= '\020' && c <= '\027') || (c >= '\030' && c <= '\037') || c == '\177'
|
||||
iscntrl :: proc(c: rune) -> bool {
|
||||
return unicode.is_control(c)
|
||||
}
|
||||
|
||||
islower :: proc(c: u8) -> bool {
|
||||
return c >= 'a' && c <= 'z'
|
||||
islower :: proc(c: rune) -> bool {
|
||||
return unicode.is_lower(c)
|
||||
}
|
||||
|
||||
isupper :: proc(c: u8) -> bool {
|
||||
return c >= 'A' && c <= 'Z'
|
||||
isupper :: proc(c: rune) -> bool {
|
||||
return unicode.is_upper(c)
|
||||
}
|
||||
|
||||
isgraph :: proc(c: u8) -> bool {
|
||||
return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
|
||||
isgraph :: proc(c: rune) -> bool {
|
||||
return unicode.is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
|
||||
}
|
||||
|
||||
ispunct :: proc(c: u8) -> bool {
|
||||
return (c >= '{' && c <= '~') || (c == '`') || (c >= '[' && c <= '_') || (c == '@') || (c >= ':' && c <= '?') || (c >= '(' && c <= '/') || (c >= '!' && c <= '\'')
|
||||
ispunct :: proc(c: rune) -> bool {
|
||||
return unicode.is_punct(c)
|
||||
}
|
||||
|
||||
isxdigit :: proc(c: u8) -> bool {
|
||||
return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
|
||||
isxdigit :: proc(c: rune) -> bool {
|
||||
return unicode.is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
|
||||
}
|
||||
|
||||
isspace :: proc(c: u8) -> bool {
|
||||
return c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || c == ' '
|
||||
isspace :: proc(c: rune) -> bool {
|
||||
return unicode.is_space(c)
|
||||
}
|
||||
|
||||
// ascii safe
|
||||
tolower :: proc(c: u8) -> u8 {
|
||||
if c >= 65 && c <= 90 { // upper case
|
||||
return c + 32
|
||||
utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) {
|
||||
c, size = utf8.decode_rune_in_string(bytes)
|
||||
|
||||
if c == utf8.RUNE_ERROR {
|
||||
err = .Rune_Error
|
||||
}
|
||||
|
||||
return c
|
||||
return
|
||||
}
|
||||
|
||||
utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) {
|
||||
size: int
|
||||
c, size = utf8.decode_rune_in_string(bytes[index^:])
|
||||
|
||||
if c == utf8.RUNE_ERROR {
|
||||
err = .Rune_Error
|
||||
}
|
||||
|
||||
index^ += size
|
||||
return
|
||||
}
|
||||
|
||||
// continuation byte?
|
||||
is_cont :: proc(b: byte) -> bool {
|
||||
return b & 0xc0 == 0x80
|
||||
}
|
||||
|
||||
utf8_prev :: proc(bytes: string, a, b: int) -> int {
|
||||
b := b
|
||||
|
||||
for a < b && is_cont(bytes[b - 1]) {
|
||||
b -= 1
|
||||
}
|
||||
|
||||
return a < b ? b - 1 : a
|
||||
}
|
||||
|
||||
utf8_next :: proc(bytes: string, a: int) -> int {
|
||||
a := a
|
||||
b := len(bytes)
|
||||
|
||||
for a < b - 1 && is_cont(bytes[a + 1]) {
|
||||
a += 1
|
||||
}
|
||||
|
||||
return a < b ? a + 1 : b
|
||||
}
|
||||
|
||||
check_capture :: proc(ms: ^MatchState, l: rune) -> (int, Error) {
|
||||
@@ -125,54 +167,52 @@ capture_to_close :: proc(ms: ^MatchState) -> (int, Error) {
|
||||
return 0, .Invalid_Pattern_Capture
|
||||
}
|
||||
|
||||
classend :: proc(ms: ^MatchState, p: int) -> (int, Error) {
|
||||
ch := ms.pattern[p]
|
||||
p := p + 1
|
||||
classend :: proc(ms: ^MatchState, p: int) -> (step: int, err: Error) {
|
||||
step = p
|
||||
ch := utf8_advance(ms.pattern, &step) or_return
|
||||
|
||||
switch ch {
|
||||
case L_ESC: {
|
||||
// if > 0 {
|
||||
// fmt.eprintln("ERR classend: not enough pattern length")
|
||||
// return nil
|
||||
// }
|
||||
if step == len(ms.pattern) {
|
||||
err = .Malformed_Pattern
|
||||
return
|
||||
}
|
||||
|
||||
return p + 1, .OK
|
||||
utf8_advance(ms.pattern, &step) or_return
|
||||
}
|
||||
|
||||
case '[': {
|
||||
if ms.pattern[p] == '^' {
|
||||
p += 1
|
||||
// fine with step by 1
|
||||
if ms.pattern[step] == '^' {
|
||||
step += 1
|
||||
}
|
||||
|
||||
for ms.pattern[p] != ']' {
|
||||
// if p == len(ms.pattern) {
|
||||
// return 0, .Malformed_Pattern
|
||||
// }
|
||||
|
||||
ch := ms.pattern[p]
|
||||
p += 1
|
||||
|
||||
if p < len(ms.pattern) && ch == L_ESC {
|
||||
// skip escapes like '%'
|
||||
p += 1
|
||||
// run till end is reached
|
||||
for ms.pattern[step] != ']' {
|
||||
if step == len(ms.pattern) {
|
||||
err = .Malformed_Pattern
|
||||
return
|
||||
}
|
||||
|
||||
// if ms.pattern[p] == ']' {
|
||||
// break
|
||||
// }
|
||||
// dont care about utf8 here
|
||||
step += 1
|
||||
|
||||
if step < len(ms.pattern) && ms.pattern[step] == L_ESC {
|
||||
// skip escapes like '%'
|
||||
step += 1
|
||||
}
|
||||
}
|
||||
|
||||
return p + 1, .OK
|
||||
}
|
||||
|
||||
case: {
|
||||
return p, .OK
|
||||
// advance last time
|
||||
step += 1
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool {
|
||||
sig := true
|
||||
matchbracketclass :: proc(ms: ^MatchState, c: rune, p, ec: int) -> (sig: bool, err: Error) {
|
||||
sig = true
|
||||
p := p
|
||||
|
||||
if ms.pattern[p + 1] == '^' {
|
||||
@@ -180,98 +220,127 @@ matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool {
|
||||
sig = false
|
||||
}
|
||||
|
||||
p += 1
|
||||
|
||||
// while inside of class range
|
||||
for p < ec {
|
||||
ch := ms.pattern[p]
|
||||
char := utf8_advance(ms.pattern, &p) or_return
|
||||
|
||||
// e.g. %a
|
||||
if ms.pattern[p] == L_ESC {
|
||||
p += 1
|
||||
if char == L_ESC {
|
||||
next := utf8_advance(ms.pattern, &p) or_return
|
||||
|
||||
if match_class(c, ms.pattern[p]) {
|
||||
return sig
|
||||
}
|
||||
} else if p + 2 < len(ms.pattern) && ms.pattern[p + 1] == '-' {
|
||||
// e.g. [a-z] check
|
||||
if ms.pattern[p] <= c && c <= ms.pattern[p + 2] {
|
||||
return sig
|
||||
if match_class(c, next) {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
next, next_size := utf8_peek(ms.pattern[p:]) or_return
|
||||
|
||||
p += 2
|
||||
} else if ms.pattern[p] == c {
|
||||
return sig
|
||||
// TODO test case for [a-???] where ??? is missing
|
||||
if next == '-' && p + next_size < len(ms.pattern) {
|
||||
// advance 2 codepoints
|
||||
p += next_size
|
||||
last := utf8_advance(ms.pattern, &p) or_return
|
||||
|
||||
if char <= c && c <= last {
|
||||
return
|
||||
}
|
||||
} else if char == c {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
p += 1
|
||||
}
|
||||
|
||||
return !sig
|
||||
sig = !sig
|
||||
return
|
||||
}
|
||||
|
||||
singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> bool {
|
||||
singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) {
|
||||
if s >= len(ms.src) {
|
||||
return false
|
||||
return
|
||||
}
|
||||
|
||||
switch ms.pattern[p] {
|
||||
case '.': return true
|
||||
case L_ESC: return match_class(ms.src[s], ms.pattern[p + 1])
|
||||
case '[': return matchbracketclass(ms, ms.src[s], p, ep - 1)
|
||||
case: return ms.src[s] == ms.pattern[p]
|
||||
pchar, psize := utf8_peek(ms.pattern[p:]) or_return
|
||||
schar, ssize := utf8_peek(ms.src[s:]) or_return
|
||||
schar_size = ssize
|
||||
|
||||
switch pchar {
|
||||
case '.': matched = true
|
||||
case L_ESC: {
|
||||
pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return
|
||||
matched = match_class(schar, pchar_next)
|
||||
}
|
||||
case '[': {
|
||||
matched = matchbracketclass(ms, schar, p, ep - 1) or_return
|
||||
}
|
||||
case: {
|
||||
matched = schar == pchar
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) {
|
||||
matchbalance :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
if p >= len(ms.pattern) - 1 {
|
||||
return INVALID, .Invalid_Pattern_Capture
|
||||
}
|
||||
|
||||
schar, ssize := utf8_peek(ms.src[s:]) or_return
|
||||
pchar, psize := utf8_peek(ms.pattern[p:]) or_return
|
||||
|
||||
// skip until the src and pattern match
|
||||
if ms.src[s] != ms.pattern[p] {
|
||||
if schar != pchar {
|
||||
return INVALID, .OK
|
||||
}
|
||||
|
||||
s_begin := s
|
||||
cont := 1
|
||||
s := s + 1
|
||||
begin := ms.pattern[p]
|
||||
end := ms.pattern[p + 1]
|
||||
s := s + ssize
|
||||
begin := pchar
|
||||
end, _ := utf8_peek(ms.pattern[p + psize:]) or_return
|
||||
|
||||
for s < len(ms.src) {
|
||||
ch := ms.src[s]
|
||||
ch := utf8_advance(ms.src, &s) or_return
|
||||
|
||||
if ch == end {
|
||||
cont -= 1
|
||||
|
||||
if cont == 0 {
|
||||
return s + 1, .OK
|
||||
return s, .OK
|
||||
}
|
||||
} else if ch == begin {
|
||||
cont += 1
|
||||
}
|
||||
|
||||
s += 1
|
||||
}
|
||||
|
||||
return INVALID, .OK
|
||||
}
|
||||
|
||||
max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
|
||||
i := 0
|
||||
for singlematch(ms, s + i, p, ep) {
|
||||
i += 1
|
||||
m := s
|
||||
|
||||
// count up matches
|
||||
for {
|
||||
matched, size := singlematch(ms, m, p, ep) or_return
|
||||
|
||||
if !matched {
|
||||
break
|
||||
}
|
||||
|
||||
m += size
|
||||
}
|
||||
|
||||
for i >= 0 {
|
||||
result := match(ms, s + i, ep + 1) or_return
|
||||
for s <= m {
|
||||
result := match(ms, m, ep + 1) or_return
|
||||
|
||||
if result != INVALID {
|
||||
return result, .OK
|
||||
}
|
||||
|
||||
i -= 1
|
||||
if s == m {
|
||||
break
|
||||
}
|
||||
|
||||
m = utf8_prev(ms.src, s, m)
|
||||
}
|
||||
|
||||
return INVALID, .OK
|
||||
@@ -285,10 +354,15 @@ min_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
|
||||
|
||||
if result != INVALID {
|
||||
return result, .OK
|
||||
} else if singlematch(ms, s, p, ep) {
|
||||
s += 1
|
||||
} else {
|
||||
return INVALID, .OK
|
||||
// TODO receive next step maybe?
|
||||
matched, rune_size := singlematch(ms, s, p, ep) or_return
|
||||
|
||||
if matched {
|
||||
s += rune_size
|
||||
} else {
|
||||
return INVALID, .OK
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -339,7 +413,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
return s, .OK
|
||||
}
|
||||
|
||||
switch ms.pattern[p] {
|
||||
// NOTE we can walk by ascii steps if we know the characters are ascii
|
||||
char, _ := utf8_peek(ms.pattern[p:]) or_return
|
||||
switch char {
|
||||
case '(': {
|
||||
if ms.pattern[p + 1] == ')' {
|
||||
s = start_capture(ms, s, p + 2, CAP_POSITION) or_return
|
||||
@@ -389,13 +465,23 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
}
|
||||
|
||||
ep := classend(ms, p) or_return
|
||||
previous := s == 0 ? '\x00' : ms.src[s - 1]
|
||||
// allow last character to count too
|
||||
current := s >= len(ms.src) ? '\x00' : ms.src[s]
|
||||
previous, current: rune
|
||||
|
||||
// fmt.eprintln("TRY", rune(ms.src[s]), ep)
|
||||
if !matchbracketclass(ms, previous, p, ep - 1) &&
|
||||
matchbracketclass(ms, current, p, ep - 1) {
|
||||
// get previous
|
||||
if s != 0 {
|
||||
temp := utf8_prev(ms.src, 0, s)
|
||||
previous, _ = utf8_peek(ms.src[temp:]) or_return
|
||||
}
|
||||
|
||||
// get current
|
||||
if s != len(ms.src) {
|
||||
current, _ = utf8_peek(ms.src[s:]) or_return
|
||||
}
|
||||
|
||||
m1 := matchbracketclass(ms, previous, p, ep - 1) or_return
|
||||
m2 := matchbracketclass(ms, current, p, ep - 1) or_return
|
||||
|
||||
if !m1 && m2 {
|
||||
return match(ms, s, ep)
|
||||
}
|
||||
|
||||
@@ -428,8 +514,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
s := s
|
||||
ep := classend(ms, p) or_return
|
||||
single_matched, ssize := singlematch(ms, s, p, ep) or_return
|
||||
|
||||
if !singlematch(ms, s, p, ep) {
|
||||
if !single_matched {
|
||||
epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
|
||||
|
||||
if epc == '*' || epc == '?' || epc == '-' {
|
||||
@@ -442,7 +529,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
|
||||
switch epc {
|
||||
case '?': {
|
||||
result := match(ms, s + 1, ep + 1) or_return
|
||||
result := match(ms, s + ssize, ep + 1) or_return
|
||||
|
||||
if result != INVALID {
|
||||
s = result
|
||||
@@ -452,7 +539,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
}
|
||||
|
||||
case '+': {
|
||||
s = max_expand(ms, s + 1, p, ep) or_return
|
||||
s = max_expand(ms, s + ssize, p, ep) or_return
|
||||
}
|
||||
|
||||
case '*': {
|
||||
@@ -464,7 +551,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
}
|
||||
|
||||
case: {
|
||||
return match(ms, s + 1, ep)
|
||||
return match(ms, s + ssize, ep)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -643,8 +730,8 @@ gmatch :: proc(
|
||||
ok = true
|
||||
first := length > 1 ? 1 : 0
|
||||
cap := captures[first]
|
||||
res = haystack[cap.start:cap.end]
|
||||
haystack^ = haystack[cap.end:]
|
||||
res = haystack[cap.byte_start:cap.byte_end]
|
||||
haystack^ = haystack[cap.byte_end:]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -677,13 +764,13 @@ gsub_builder :: proc(
|
||||
cap := captures[0]
|
||||
|
||||
// write front till capture
|
||||
strings.write_string(builder, haystack[:cap.start])
|
||||
strings.write_string(builder, haystack[:cap.byte_start])
|
||||
|
||||
// write replacements
|
||||
strings.write_string(builder, replace)
|
||||
|
||||
// advance string till end
|
||||
haystack = haystack[cap.end:]
|
||||
haystack = haystack[cap.byte_end:]
|
||||
}
|
||||
|
||||
strings.write_string(builder, haystack[:])
|
||||
@@ -722,11 +809,11 @@ gsub_with :: proc(
|
||||
|
||||
cap := captures[0]
|
||||
|
||||
word := haystack[cap.start:cap.end]
|
||||
word := haystack[cap.byte_start:cap.byte_end]
|
||||
call(data, word)
|
||||
|
||||
// advance string till end
|
||||
haystack = haystack[cap.end:]
|
||||
haystack = haystack[cap.byte_end:]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -744,8 +831,8 @@ gfind :: proc(
|
||||
if length != 0 && err == .OK {
|
||||
ok = true
|
||||
cap := captures[0]
|
||||
res = haystack[cap.start:cap.end]
|
||||
haystack^ = haystack[cap.end:]
|
||||
res = haystack[cap.byte_start:cap.byte_end]
|
||||
haystack^ = haystack[cap.byte_end:]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,21 +4,28 @@ import lua "core:text/lua"
|
||||
import "core:testing"
|
||||
import "core:fmt"
|
||||
import "core:os"
|
||||
import "core:io"
|
||||
|
||||
TEST_count: int
|
||||
TEST_fail: int
|
||||
|
||||
when ODIN_TEST {
|
||||
expect :: testing.expect
|
||||
} else {
|
||||
expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) {
|
||||
TEST_count += 1
|
||||
if !condition {
|
||||
TEST_fail += 1
|
||||
fmt.printf("%v %v\n", loc, message)
|
||||
return
|
||||
}
|
||||
// inline expect with custom props
|
||||
failed :: proc(t: ^testing.T, ok: bool, loc := #caller_location) -> bool {
|
||||
TEST_count += 1
|
||||
|
||||
if !ok {
|
||||
fmt.wprintf(t.w, "%v: ", loc)
|
||||
t.error_count += 1
|
||||
TEST_fail += 1
|
||||
}
|
||||
|
||||
return !ok
|
||||
}
|
||||
|
||||
expect :: testing.expect
|
||||
|
||||
logf :: proc(t: ^testing.T, format: string, args: ..any) {
|
||||
fmt.wprintf(t.w, format, ..args)
|
||||
}
|
||||
|
||||
// find correct byte offsets
|
||||
@@ -55,20 +62,20 @@ test_find :: proc(t: ^testing.T) {
|
||||
}
|
||||
|
||||
captures: [lua.MAXCAPTURES]lua.Match
|
||||
for entry in ENTRIES {
|
||||
for entry, i in ENTRIES {
|
||||
captures[0] = {}
|
||||
length, err := lua.find_aux(entry.s, entry.p, entry.offset, true, &captures)
|
||||
cap := captures[0]
|
||||
ok := length > 0 && err == .OK
|
||||
success := entry.match.ok == ok && entry.match.start == cap.start && entry.match.end == cap.end
|
||||
success := entry.match.ok == ok && entry.match.start == cap.byte_start && entry.match.end == cap.byte_end
|
||||
|
||||
if !success {
|
||||
fmt.eprintf("Find failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p)
|
||||
if failed(t, success) {
|
||||
logf(t, "Find %d failed!\n", i)
|
||||
logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p)
|
||||
logf(t, "\tSTART: %d == %d?\n", entry.match.start, cap.byte_start)
|
||||
logf(t, "\tEND: %d == %d?\n", entry.match.end, cap.byte_end)
|
||||
logf(t, "\tErr: %v\tLength %d\n", err, length)
|
||||
}
|
||||
|
||||
expect(t, entry.match.start == cap.start, "match start didnt match")
|
||||
expect(t, entry.match.end == cap.end, "match end didnt match",)
|
||||
expect(t, entry.match.ok == ok, "find result didnt match")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -179,16 +186,15 @@ test_match :: proc(t: ^testing.T) {
|
||||
captures[0] = {}
|
||||
length, err := lua.find_aux(entry.s, entry.p, 0, false, &captures)
|
||||
ok := length > 0 && err == .OK
|
||||
result := entry.s[captures[0].start:captures[0].end]
|
||||
result := entry.s[captures[0].byte_start:captures[0].byte_end]
|
||||
success := entry.ok == ok && result == entry.result
|
||||
|
||||
if !success {
|
||||
fmt.eprintf("Match failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p)
|
||||
fmt.eprintf("Match invalid result! |WANTED:| %s |GOT:| %s\n", entry.result, result)
|
||||
if failed(t, success) {
|
||||
logf(t, "Match %d failed!\n", i)
|
||||
logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p)
|
||||
logf(t, "\tResults: WANTED %s\tGOT %s\n", entry.result, result)
|
||||
logf(t, "\tErr: %v\tLength %d\n", err, length)
|
||||
}
|
||||
|
||||
expect(t, entry.ok == ok, "find result didnt match")
|
||||
expect(t, result == entry.result, "entry result didnt match")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -202,12 +208,17 @@ test_captures :: proc(t: ^testing.T) {
|
||||
// match all captures
|
||||
compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) {
|
||||
length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures)
|
||||
expect(t, len(comp) == length, "didnt match input comparison strings", loc)
|
||||
if failed(t, len(comp) == length) {
|
||||
logf(t, "Captures Compare Failed -> Lengths %d != %d\n", len(comp), length)
|
||||
}
|
||||
|
||||
for i in 0..<length {
|
||||
cap := test.captures[i]
|
||||
text := haystack[cap.start:cap.end]
|
||||
expect(t, comp[i] == text, "didnt match comparison string", loc)
|
||||
text := haystack[cap.byte_start:cap.byte_end]
|
||||
|
||||
if failed(t, comp[i] == text) {
|
||||
logf(t, "Capture don't equal -> %s != %s\n", comp[i], text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -215,7 +226,12 @@ test_captures :: proc(t: ^testing.T) {
|
||||
matches :: proc(t: ^testing.T, test: ^Temp, haystack: string, ok: bool, loc := #caller_location) {
|
||||
length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures)
|
||||
result := length > 0 && err == .OK
|
||||
expect(t, result == ok, "result didnt eq", loc)
|
||||
|
||||
if failed(t, result == ok) {
|
||||
logf(t, "Capture match failed!\n")
|
||||
logf(t, "\tErr: %v\n", err)
|
||||
logf(t, "\tLength: %v\n", length)
|
||||
}
|
||||
}
|
||||
|
||||
temp := Temp { pattern = "(one).+" }
|
||||
@@ -238,8 +254,8 @@ test_captures :: proc(t: ^testing.T) {
|
||||
lua.find_aux(haystack, pattern, 0, false, &captures)
|
||||
cap1 := captures[1]
|
||||
cap2 := captures[2]
|
||||
text1 := haystack[cap1.start:cap1.end]
|
||||
text2 := haystack[cap2.start:cap2.end]
|
||||
text1 := haystack[cap1.byte_start:cap1.byte_end]
|
||||
text2 := haystack[cap2.byte_start:cap2.byte_end]
|
||||
expect(t, text1 == "233", "Multi-Capture failed at 1")
|
||||
expect(t, text2 == "hello", "Multi-Capture failed at 2")
|
||||
}
|
||||
@@ -247,6 +263,13 @@ test_captures :: proc(t: ^testing.T) {
|
||||
|
||||
@test
|
||||
test_gmatch :: proc(t: ^testing.T) {
|
||||
gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) {
|
||||
if failed(t, a[index] == b) {
|
||||
logf(t, "GMATCH %d failed!\n", index)
|
||||
logf(t, "\t%s != %s\n", a[index], b)
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
haystack := "testing this out 123"
|
||||
pattern := "%w+"
|
||||
@@ -256,7 +279,7 @@ test_gmatch :: proc(t: ^testing.T) {
|
||||
index: int
|
||||
|
||||
for match in lua.gmatch(s, pattern, &captures) {
|
||||
expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match))
|
||||
gmatch_check(t, index, output[:], match)
|
||||
index += 1
|
||||
}
|
||||
}
|
||||
@@ -270,7 +293,7 @@ test_gmatch :: proc(t: ^testing.T) {
|
||||
index: int
|
||||
|
||||
for match in lua.gmatch(s, pattern, &captures) {
|
||||
expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match))
|
||||
gmatch_check(t, index, output[:], match)
|
||||
index += 1
|
||||
}
|
||||
}
|
||||
@@ -284,7 +307,7 @@ test_gmatch :: proc(t: ^testing.T) {
|
||||
index: int
|
||||
|
||||
for match in lua.gmatch(s, pattern, &captures) {
|
||||
expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match))
|
||||
gmatch_check(t, index, output[:], match)
|
||||
index += 1
|
||||
}
|
||||
}
|
||||
@@ -308,11 +331,15 @@ test_gfind :: proc(t: ^testing.T) {
|
||||
index: int
|
||||
|
||||
for word in lua.gfind(s, pattern, &captures) {
|
||||
expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word))
|
||||
if failed(t, output[index] == word) {
|
||||
logf(t, "GFIND %d failed!\n", index)
|
||||
logf(t, "\t%s != %s\n", output[index], word)
|
||||
}
|
||||
index += 1
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_frontier :: proc(t: ^testing.T) {
|
||||
Temp :: struct {
|
||||
t: ^testing.T,
|
||||
@@ -322,11 +349,12 @@ test_frontier :: proc(t: ^testing.T) {
|
||||
|
||||
call :: proc(data: rawptr, word: string) {
|
||||
temp := cast(^Temp) data
|
||||
expect(
|
||||
temp.t,
|
||||
word == temp.output[temp.index],
|
||||
fmt.tprintf("frontier temp didnt match: %s != %s\n", word, temp.output[temp.index]),
|
||||
)
|
||||
|
||||
if failed(temp.t, word == temp.output[temp.index]) {
|
||||
logf(temp.t, "GSUB_WITH %d failed!\n", temp.index)
|
||||
logf(temp.t, "\t%s != %s\n", temp.output[temp.index], word)
|
||||
}
|
||||
|
||||
temp.index += 1
|
||||
}
|
||||
|
||||
@@ -343,8 +371,38 @@ test_frontier :: proc(t: ^testing.T) {
|
||||
lua.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call)
|
||||
}
|
||||
|
||||
@test
|
||||
test_utf8 :: proc(t: ^testing.T) {
|
||||
// {
|
||||
// haystack := "恥ずべき恥フク恥ロ"
|
||||
// s := &haystack
|
||||
// captures: [lua.MAXCAPTURES]lua.Match
|
||||
|
||||
// for word in lua.gmatch(s, "恥", &captures) {
|
||||
// fmt.eprintln(word)
|
||||
// }
|
||||
// }
|
||||
|
||||
{
|
||||
haystack := "恥ずべき恥フク恥ロ"
|
||||
s := &haystack
|
||||
captures: [lua.MAXCAPTURES]lua.Match
|
||||
|
||||
for word in lua.gmatch(s, "w+", &captures) {
|
||||
fmt.eprintln(word)
|
||||
}
|
||||
}
|
||||
|
||||
// captures: [MAXCAPTURES]Match
|
||||
// length, err := lua.find_aux("damn, pattern,)
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
t: testing.T
|
||||
stream := os.stream_from_handle(os.stdout)
|
||||
w := io.to_writer(stream)
|
||||
t.w = w
|
||||
|
||||
test_find(&t)
|
||||
test_match(&t)
|
||||
test_captures(&t)
|
||||
@@ -353,7 +411,9 @@ main :: proc() {
|
||||
test_gfind(&t)
|
||||
test_frontier(&t)
|
||||
|
||||
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
|
||||
// test_utf8(&t)
|
||||
|
||||
fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
|
||||
if TEST_fail > 0 {
|
||||
os.exit(1)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user