mirror of
https://github.com/odin-lang/Odin.git
synced 2026-01-10 15:03:22 +00:00
balanced string, frontier pattern, gsub_with and their tests added
This commit is contained in:
@@ -19,6 +19,7 @@ Error :: enum {
|
||||
Invalid_Capture_Index,
|
||||
Invalid_Pattern_Capture,
|
||||
Unfinished_Capture,
|
||||
Malformed_Pattern,
|
||||
}
|
||||
|
||||
L_ESC :: '%'
|
||||
@@ -143,20 +144,22 @@ classend :: proc(ms: ^MatchState, p: int) -> (int, Error) {
|
||||
p += 1
|
||||
}
|
||||
|
||||
// TODO double check
|
||||
for {
|
||||
ch := ms.pattern[p]
|
||||
for ms.pattern[p] != ']' {
|
||||
// if p == len(ms.pattern) {
|
||||
// return 0, .Malformed_Pattern
|
||||
// }
|
||||
|
||||
if ch == L_ESC && p <= len(ms.pattern) {
|
||||
ch := ms.pattern[p]
|
||||
p += 1
|
||||
|
||||
if p < len(ms.pattern) && ch == L_ESC {
|
||||
// skip escapes like '%'
|
||||
p += 1
|
||||
}
|
||||
|
||||
if ms.pattern[p] == ']' {
|
||||
break
|
||||
}
|
||||
|
||||
p += 1
|
||||
// if ms.pattern[p] == ']' {
|
||||
// break
|
||||
// }
|
||||
}
|
||||
|
||||
return p + 1, .OK
|
||||
@@ -183,13 +186,14 @@ matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool {
|
||||
for p < ec {
|
||||
ch := ms.pattern[p]
|
||||
|
||||
if ms.pattern[p] == L_ESC {
|
||||
// e.g. %a
|
||||
if ms.pattern[p] == L_ESC {
|
||||
p += 1
|
||||
|
||||
if match_class(c, ms.pattern[p]) {
|
||||
return sig
|
||||
}
|
||||
} else if ms.pattern[p + 1] == '-' && p + 2 < len(ms.pattern) {
|
||||
} else if p + 2 < len(ms.pattern) && ms.pattern[p + 1] == '-' {
|
||||
// e.g. [a-z] check
|
||||
if ms.pattern[p] <= c && c <= ms.pattern[p + 2] {
|
||||
return sig
|
||||
@@ -219,39 +223,40 @@ singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
// matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) {
|
||||
// s_begin := s
|
||||
// s := s + 1
|
||||
// cont := 0
|
||||
matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) {
|
||||
if p >= len(ms.pattern) - 1 {
|
||||
return INVALID, .Invalid_Pattern_Capture
|
||||
}
|
||||
|
||||
// begin := ms.pattern[p]
|
||||
// end := ms.pattern[p + 1]
|
||||
// print("BALANCED between", rune(begin), "AND", rune(end))
|
||||
// skip until the src and pattern match
|
||||
if ms.src[s] != ms.pattern[p] {
|
||||
return INVALID, .OK
|
||||
}
|
||||
|
||||
// for s < len(ms.src) {
|
||||
// ch := ms.src[s]
|
||||
// print("\t", rune(ch))
|
||||
s_begin := s
|
||||
cont := 1
|
||||
s := s + 1
|
||||
begin := ms.pattern[p]
|
||||
end := ms.pattern[p + 1]
|
||||
|
||||
// if ch == end {
|
||||
// cont -= 1
|
||||
// print("END", cont)
|
||||
for s < len(ms.src) {
|
||||
ch := ms.src[s]
|
||||
|
||||
// if cont == 0 {
|
||||
// print("BALANCED RET", s + 1, len(ms.src), ms.src[s_begin:s + 1])
|
||||
// return s + 1
|
||||
// }
|
||||
// } else if ch == begin {
|
||||
// cont += 1
|
||||
// print("BEGIN", cont)
|
||||
// }
|
||||
if ch == end {
|
||||
cont -= 1
|
||||
|
||||
// s += 1
|
||||
// }
|
||||
if cont == 0 {
|
||||
return s + 1, .OK
|
||||
}
|
||||
} else if ch == begin {
|
||||
cont += 1
|
||||
}
|
||||
|
||||
// print("OUT OF BALANCE", cont)
|
||||
// // out of balance
|
||||
// return 0, .
|
||||
// }
|
||||
s += 1
|
||||
}
|
||||
|
||||
return INVALID, .OK
|
||||
}
|
||||
|
||||
max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
|
||||
i := 0
|
||||
@@ -263,7 +268,6 @@ max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) {
|
||||
result := match(ms, s + i, ep + 1) or_return
|
||||
|
||||
if result != INVALID {
|
||||
// print("SET", result)
|
||||
return result, .OK
|
||||
}
|
||||
|
||||
@@ -368,35 +372,34 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
switch ms.pattern[p + 1] {
|
||||
// balanced string
|
||||
case 'b': {
|
||||
// res := matchbalance(ms, s, p + 2)
|
||||
|
||||
// if data, ok := res.?; ok {
|
||||
// // s = data
|
||||
// // eg after %b()
|
||||
// // print("SUCCESS")
|
||||
// return patt_match(ms, s, p + 4)
|
||||
// }
|
||||
s = matchbalance(ms, s, p + 2) or_return
|
||||
|
||||
if s != INVALID {
|
||||
// eg after %b()
|
||||
return match(ms, s, p + 4)
|
||||
}
|
||||
}
|
||||
|
||||
// frontier
|
||||
case 'f': {
|
||||
// p += 2
|
||||
p += 2
|
||||
|
||||
// if ms.pattern[p] != '[' {
|
||||
// print("missing '[' after %f in pattern")
|
||||
// return nil
|
||||
// }
|
||||
if ms.pattern[p] != '[' {
|
||||
return INVALID, .Invalid_Pattern_Capture
|
||||
}
|
||||
|
||||
// ep := classend(ms, p).?
|
||||
// previous := 0 if s == 0 else s - 1
|
||||
ep := classend(ms, p) or_return
|
||||
previous := s == 0 ? '\x00' : ms.src[s - 1]
|
||||
// allow last character to count too
|
||||
current := s >= len(ms.src) ? '\x00' : ms.src[s]
|
||||
|
||||
// if !matchbracketclass(ms, ms.src[previous], p, ep - 1) &&
|
||||
// matchbracketclass(ms, ms.src[s], p, ep) {
|
||||
// return patt_match(ms, s, ep)
|
||||
// }
|
||||
// fmt.eprintln("TRY", rune(ms.src[s]), ep)
|
||||
if !matchbracketclass(ms, previous, p, ep - 1) &&
|
||||
matchbracketclass(ms, current, p, ep - 1) {
|
||||
return match(ms, s, ep)
|
||||
}
|
||||
|
||||
// return nil
|
||||
s = INVALID
|
||||
}
|
||||
|
||||
// capture group
|
||||
@@ -416,7 +419,6 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
|
||||
case: {
|
||||
return match_default(ms, s, p)
|
||||
// print("PATT DEF", rune(ms.src[s]), rune(ms.pattern[p]))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -426,11 +428,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
s := s
|
||||
ep := classend(ms, p) or_return
|
||||
// ch := s < len(ms.src) ? rune(ms.src[s]) : 0
|
||||
|
||||
if !singlematch(ms, s, p, ep) {
|
||||
epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
|
||||
// print("+++", rune(epc))
|
||||
|
||||
if epc == '*' || epc == '?' || epc == '-' {
|
||||
return match(ms, s, ep + 1)
|
||||
@@ -439,7 +439,6 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) {
|
||||
}
|
||||
} else {
|
||||
epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
|
||||
// print("~~~", ch, rune(epc))
|
||||
|
||||
switch epc {
|
||||
case '?': {
|
||||
@@ -652,7 +651,7 @@ gmatch :: proc(
|
||||
return
|
||||
}
|
||||
|
||||
// gsub with builder
|
||||
// gsub with builder, replace patterns found with the replace content
|
||||
gsub_builder :: proc(
|
||||
builder: ^strings.Builder,
|
||||
haystack: string,
|
||||
@@ -702,9 +701,38 @@ gsub_allocator :: proc(
|
||||
return gsub_builder(&builder, haystack, pattern, replace)
|
||||
}
|
||||
|
||||
// call a procedure on every match in the haystack
|
||||
gsub_with :: proc(
|
||||
haystack: string,
|
||||
pattern: string,
|
||||
data: rawptr,
|
||||
call: proc(data: rawptr, word: string),
|
||||
) {
|
||||
// find matches
|
||||
captures: [MAXCAPTURES]Match
|
||||
haystack := haystack
|
||||
|
||||
for {
|
||||
length, err := find_aux(haystack, pattern, 0, false, &captures)
|
||||
|
||||
// done
|
||||
if length == 0 || err != .OK {
|
||||
break
|
||||
}
|
||||
|
||||
cap := captures[0]
|
||||
|
||||
word := haystack[cap.start:cap.end]
|
||||
call(data, word)
|
||||
|
||||
// advance string till end
|
||||
haystack = haystack[cap.end:]
|
||||
}
|
||||
}
|
||||
|
||||
gsub :: proc { gsub_builder, gsub_allocator }
|
||||
|
||||
// iterative find with first capture only
|
||||
// iterative find with zeroth capture only
|
||||
gfind :: proc(
|
||||
haystack: ^string,
|
||||
pattern: string,
|
||||
|
||||
@@ -15,7 +15,7 @@ when ODIN_TEST {
|
||||
TEST_count += 1
|
||||
if !condition {
|
||||
TEST_fail += 1
|
||||
fmt.printf("[%v] %v\n", loc, message)
|
||||
fmt.printf("%v %v\n", loc, message)
|
||||
return
|
||||
}
|
||||
}
|
||||
@@ -166,6 +166,12 @@ test_match :: proc(t: ^testing.T) {
|
||||
{ " testing this", "^testing", "", false },
|
||||
{ "testing this", "^%w+", "testing", true },
|
||||
{ " testing this", "^%w+", "", false },
|
||||
|
||||
// balanced string %b
|
||||
{ "testing (this) out", "%b()", "(this)", true },
|
||||
{ "testing athisz out", "%baz", "athisz", true },
|
||||
{ "testing _this_ out", "%b__", "_this_", true },
|
||||
{ "testing _this_ out", "%b_", "", false },
|
||||
}
|
||||
|
||||
captures: [lua.MAXCAPTURES]lua.Match
|
||||
@@ -294,21 +300,49 @@ test_gsub :: proc(t: ^testing.T) {
|
||||
|
||||
@test
|
||||
test_gfind :: proc(t: ^testing.T) {
|
||||
{
|
||||
haystack := "test1 123 test2 123 test3"
|
||||
pattern := "%w+"
|
||||
captures: [lua.MAXCAPTURES]lua.Match
|
||||
s := &haystack
|
||||
output := [?]string { "test1", "123", "test2", "123", "test3" }
|
||||
index: int
|
||||
haystack := "test1 123 test2 123 test3"
|
||||
pattern := "%w+"
|
||||
captures: [lua.MAXCAPTURES]lua.Match
|
||||
s := &haystack
|
||||
output := [?]string { "test1", "123", "test2", "123", "test3" }
|
||||
index: int
|
||||
|
||||
for word in lua.gfind(s, pattern, &captures) {
|
||||
expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word))
|
||||
index += 1
|
||||
}
|
||||
for word in lua.gfind(s, pattern, &captures) {
|
||||
expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word))
|
||||
index += 1
|
||||
}
|
||||
}
|
||||
|
||||
test_frontier :: proc(t: ^testing.T) {
|
||||
Temp :: struct {
|
||||
t: ^testing.T,
|
||||
index: int,
|
||||
output: [3]string,
|
||||
}
|
||||
|
||||
call :: proc(data: rawptr, word: string) {
|
||||
temp := cast(^Temp) data
|
||||
expect(
|
||||
temp.t,
|
||||
word == temp.output[temp.index],
|
||||
fmt.tprintf("frontier temp didnt match: %s != %s\n", word, temp.output[temp.index]),
|
||||
)
|
||||
temp.index += 1
|
||||
}
|
||||
|
||||
temp := Temp {
|
||||
t = t,
|
||||
output = {
|
||||
"THE",
|
||||
"QUICK",
|
||||
"JUMPS",
|
||||
},
|
||||
}
|
||||
|
||||
// https://lua-users.org/wiki/FrontierPattern example taken from here
|
||||
lua.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call)
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
t: testing.T
|
||||
test_find(&t)
|
||||
@@ -317,6 +351,7 @@ main :: proc() {
|
||||
test_gmatch(&t)
|
||||
test_gsub(&t)
|
||||
test_gfind(&t)
|
||||
test_frontier(&t)
|
||||
|
||||
fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
|
||||
if TEST_fail > 0 {
|
||||
|
||||
Reference in New Issue
Block a user