mirror of
https://github.com/odin-lang/Odin.git
synced 2026-02-14 07:13:14 +00:00
Make RegEx VM restartable and fix iterator infinite loop
This commit is contained in:
@@ -77,6 +77,8 @@ Match_Iterator :: struct {
|
||||
vm: virtual_machine.Machine,
|
||||
idx: int,
|
||||
temp: runtime.Allocator,
|
||||
threads: int,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -101,7 +103,6 @@ create :: proc(
|
||||
permanent_allocator := context.allocator,
|
||||
temporary_allocator := context.temp_allocator,
|
||||
) -> (result: Regular_Expression, err: Error) {
|
||||
|
||||
// For the sake of speed and simplicity, we first run all the intermediate
|
||||
// processes such as parsing and compilation through the temporary
|
||||
// allocator.
|
||||
@@ -294,6 +295,7 @@ create_iterator :: proc(
|
||||
result.temp = temporary_allocator
|
||||
result.vm = virtual_machine.create(result.regex.program, str)
|
||||
result.vm.class_data = result.regex.class_data
|
||||
result.threads = max(1, virtual_machine.opcode_count(result.vm.code) - 1)
|
||||
|
||||
return
|
||||
}
|
||||
@@ -457,8 +459,27 @@ match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, index: int, ok:
|
||||
assert(len(it.capture.pos) >= common.MAX_CAPTURE_GROUPS,
|
||||
"Pre-allocated RegEx capture `pos` must be at least 10 elements long.")
|
||||
|
||||
// Guard against situations in which the iterator should finish.
|
||||
if it.done {
|
||||
return
|
||||
}
|
||||
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
|
||||
if it.idx > 0 {
|
||||
// Reset the state needed to `virtual_machine.run` again.
|
||||
it.vm.top_thread = 0
|
||||
it.vm.current_rune = rune(0)
|
||||
it.vm.current_rune_size = 0
|
||||
for i in 0..<it.threads {
|
||||
it.vm.threads[i] = {}
|
||||
it.vm.next_threads[i] = {}
|
||||
}
|
||||
}
|
||||
|
||||
// Take note of where the string pointer is before we start.
|
||||
sp_before := it.vm.string_pointer
|
||||
|
||||
saved: ^[2 * common.MAX_CAPTURE_GROUPS]int
|
||||
{
|
||||
context.allocator = it.temp
|
||||
@@ -469,6 +490,28 @@ match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, index: int, ok:
|
||||
}
|
||||
}
|
||||
|
||||
if !ok {
|
||||
// Match failed, bail out.
|
||||
return
|
||||
}
|
||||
|
||||
if it.vm.string_pointer == sp_before {
|
||||
// The string pointer did not move, but there was a match.
|
||||
//
|
||||
// At this point, the pattern supplied to the iterator will infinitely
|
||||
// loop if we do not intervene.
|
||||
it.done = true
|
||||
}
|
||||
if it.vm.string_pointer == len(it.vm.memory) {
|
||||
// The VM hit the end of the string.
|
||||
//
|
||||
// We do not check at the start, because a match of pattern `$`
|
||||
// against string "" is valid and must return a match.
|
||||
//
|
||||
// This check prevents a double-match of `$` against a non-empty string.
|
||||
it.done = true
|
||||
}
|
||||
|
||||
str := string(it.vm.memory)
|
||||
num_groups: int
|
||||
|
||||
@@ -488,9 +531,7 @@ match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, index: int, ok:
|
||||
num_groups = n
|
||||
}
|
||||
|
||||
defer if ok {
|
||||
it.idx += 1
|
||||
}
|
||||
defer it.idx += 1
|
||||
|
||||
if num_groups > 0 {
|
||||
result = {it.capture.pos[:num_groups], it.capture.groups[:num_groups]}
|
||||
@@ -504,8 +545,24 @@ match :: proc {
|
||||
match_iterator,
|
||||
}
|
||||
|
||||
/*
|
||||
Reset an iterator, allowing it to be run again as if new.
|
||||
|
||||
Inputs:
|
||||
- it: The iterator to reset.
|
||||
*/
|
||||
reset :: proc(it: ^Match_Iterator) {
|
||||
it.idx = 0
|
||||
it.done = false
|
||||
it.idx = 0
|
||||
it.vm.string_pointer = 0
|
||||
|
||||
it.vm.top_thread = 0
|
||||
it.vm.current_rune = rune(0)
|
||||
it.vm.current_rune_size = 0
|
||||
for i in 0..<it.threads {
|
||||
it.vm.threads[i] = {}
|
||||
it.vm.next_threads[i] = {}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -329,10 +329,10 @@ add_thread :: proc(vm: ^Machine, saved: ^[2 * common.MAX_CAPTURE_GROUPS]int, pc:
|
||||
|
||||
run :: proc(vm: ^Machine, $UNICODE_MODE: bool) -> (saved: ^[2 * common.MAX_CAPTURE_GROUPS]int, ok: bool) #no_bounds_check {
|
||||
when UNICODE_MODE {
|
||||
vm.next_rune, vm.next_rune_size = utf8.decode_rune_in_string(vm.memory)
|
||||
vm.next_rune, vm.next_rune_size = utf8.decode_rune_in_string(vm.memory[vm.string_pointer:])
|
||||
} else {
|
||||
if len(vm.memory) > 0 {
|
||||
vm.next_rune = cast(rune)vm.memory[0]
|
||||
vm.next_rune = cast(rune)vm.memory[vm.string_pointer]
|
||||
vm.next_rune_size = 1
|
||||
}
|
||||
}
|
||||
@@ -652,4 +652,4 @@ destroy :: proc(vm: Machine, allocator := context.allocator) {
|
||||
delete(vm.busy_map)
|
||||
free(vm.threads)
|
||||
free(vm.next_threads)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1119,7 +1119,7 @@ iterator_vectors := []Iterator_Test{
|
||||
|
||||
@test
|
||||
test_match_iterator :: proc(t: ^testing.T) {
|
||||
for test in iterator_vectors {
|
||||
vector: for test in iterator_vectors {
|
||||
it, err := regex.create_iterator(test.haystack, test.pattern, test.flags)
|
||||
defer regex.destroy(it)
|
||||
|
||||
@@ -1128,7 +1128,8 @@ test_match_iterator :: proc(t: ^testing.T) {
|
||||
|
||||
for capture, idx in regex.match(&it) {
|
||||
if idx >= len(test.expected) {
|
||||
break
|
||||
log.errorf("got more than expected number of captures for matching string %q against pattern %q\n\tidx %i = %v", test.haystack, test.pattern, idx, capture)
|
||||
continue vector
|
||||
}
|
||||
check_capture(t, capture, test.expected[idx])
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user