diff --git a/core/text/regex/regex.odin b/core/text/regex/regex.odin index c805740f7..90aa34946 100644 --- a/core/text/regex/regex.odin +++ b/core/text/regex/regex.odin @@ -77,6 +77,8 @@ Match_Iterator :: struct { vm: virtual_machine.Machine, idx: int, temp: runtime.Allocator, + threads: int, + done: bool, } /* @@ -101,7 +103,6 @@ create :: proc( permanent_allocator := context.allocator, temporary_allocator := context.temp_allocator, ) -> (result: Regular_Expression, err: Error) { - // For the sake of speed and simplicity, we first run all the intermediate // processes such as parsing and compilation through the temporary // allocator. @@ -294,6 +295,7 @@ create_iterator :: proc( result.temp = temporary_allocator result.vm = virtual_machine.create(result.regex.program, str) result.vm.class_data = result.regex.class_data + result.threads = max(1, virtual_machine.opcode_count(result.vm.code) - 1) return } @@ -457,8 +459,27 @@ match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, index: int, ok: assert(len(it.capture.pos) >= common.MAX_CAPTURE_GROUPS, "Pre-allocated RegEx capture `pos` must be at least 10 elements long.") + // Guard against situations in which the iterator should finish. + if it.done { + return + } + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + if it.idx > 0 { + // Reset the state needed to `virtual_machine.run` again. + it.vm.top_thread = 0 + it.vm.current_rune = rune(0) + it.vm.current_rune_size = 0 + for i in 0.. (result: Capture, index: int, ok: } } + if !ok { + // Match failed, bail out. + return + } + + if it.vm.string_pointer == sp_before { + // The string pointer did not move, but there was a match. + // + // At this point, the pattern supplied to the iterator will infinitely + // loop if we do not intervene. + it.done = true + } + if it.vm.string_pointer == len(it.vm.memory) { + // The VM hit the end of the string. + // + // We do not check at the start, because a match of pattern `$` + // against string "" is valid and must return a match. + // + // This check prevents a double-match of `$` against a non-empty string. + it.done = true + } + str := string(it.vm.memory) num_groups: int @@ -488,9 +531,7 @@ match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, index: int, ok: num_groups = n } - defer if ok { - it.idx += 1 - } + defer it.idx += 1 if num_groups > 0 { result = {it.capture.pos[:num_groups], it.capture.groups[:num_groups]} @@ -504,8 +545,24 @@ match :: proc { match_iterator, } +/* +Reset an iterator, allowing it to be run again as if new. + +Inputs: +- it: The iterator to reset. +*/ reset :: proc(it: ^Match_Iterator) { - it.idx = 0 + it.done = false + it.idx = 0 + it.vm.string_pointer = 0 + + it.vm.top_thread = 0 + it.vm.current_rune = rune(0) + it.vm.current_rune_size = 0 + for i in 0.. (saved: ^[2 * common.MAX_CAPTURE_GROUPS]int, ok: bool) #no_bounds_check { when UNICODE_MODE { - vm.next_rune, vm.next_rune_size = utf8.decode_rune_in_string(vm.memory) + vm.next_rune, vm.next_rune_size = utf8.decode_rune_in_string(vm.memory[vm.string_pointer:]) } else { if len(vm.memory) > 0 { - vm.next_rune = cast(rune)vm.memory[0] + vm.next_rune = cast(rune)vm.memory[vm.string_pointer] vm.next_rune_size = 1 } } @@ -652,4 +652,4 @@ destroy :: proc(vm: Machine, allocator := context.allocator) { delete(vm.busy_map) free(vm.threads) free(vm.next_threads) -} \ No newline at end of file +} diff --git a/tests/core/text/regex/test_core_text_regex.odin b/tests/core/text/regex/test_core_text_regex.odin index 913e716e5..8b4e3f997 100644 --- a/tests/core/text/regex/test_core_text_regex.odin +++ b/tests/core/text/regex/test_core_text_regex.odin @@ -1119,7 +1119,7 @@ iterator_vectors := []Iterator_Test{ @test test_match_iterator :: proc(t: ^testing.T) { - for test in iterator_vectors { + vector: for test in iterator_vectors { it, err := regex.create_iterator(test.haystack, test.pattern, test.flags) defer regex.destroy(it) @@ -1128,7 +1128,8 @@ test_match_iterator :: proc(t: ^testing.T) { for capture, idx in regex.match(&it) { if idx >= len(test.expected) { - break + log.errorf("got more than expected number of captures for matching string %q against pattern %q\n\tidx %i = %v", test.haystack, test.pattern, idx, capture) + continue vector } check_capture(t, capture, test.expected[idx]) }