mirror of
https://github.com/odin-lang/Odin.git
synced 2026-02-13 14:53:34 +00:00
Add iterator for core:text/regex.
Usage:
```odin
haystack := `xxfoobarxfoobarxx`
pattern := `f(o)ob(ar)`
it := regex.create_iterator(haystack, pattern, {.Global}) or_return
defer regex.destroy(it)
for capture in regex.match(&it) {
fmt.println(capture)
}
```
This commit is contained in:
@@ -8,6 +8,7 @@ package regex
|
||||
Feoramund: Initial implementation.
|
||||
*/
|
||||
|
||||
import "base:runtime"
|
||||
import "core:text/regex/common"
|
||||
import "core:text/regex/compiler"
|
||||
import "core:text/regex/optimizer"
|
||||
@@ -51,7 +52,7 @@ This struct corresponds to a set of string captures from a RegEx match.
|
||||
such that `str[pos[0][0]:pos[0][1]] == groups[0]`.
|
||||
*/
|
||||
Capture :: struct {
|
||||
pos: [][2]int,
|
||||
pos: [][2]int,
|
||||
groups: []string,
|
||||
}
|
||||
|
||||
@@ -59,11 +60,21 @@ Capture :: struct {
|
||||
A compiled Regular Expression value, to be used with the `match_*` procedures.
|
||||
*/
|
||||
Regular_Expression :: struct {
|
||||
flags: Flags `fmt:"-"`,
|
||||
flags: Flags `fmt:"-"`,
|
||||
class_data: []virtual_machine.Rune_Class_Data `fmt:"-"`,
|
||||
program: []virtual_machine.Opcode `fmt:"-"`,
|
||||
program: []virtual_machine.Opcode `fmt:"-"`,
|
||||
}
|
||||
|
||||
/*
|
||||
An iterator to repeatedly match a pattern against a string, to be used with `*_iterator` procedures.
|
||||
*/
|
||||
Match_Iterator :: struct {
|
||||
haystack: string,
|
||||
offset: int,
|
||||
regex: Regular_Expression,
|
||||
capture: Capture,
|
||||
temp: runtime.Allocator,
|
||||
}
|
||||
|
||||
/*
|
||||
Create a regular expression from a string pattern and a set of flags.
|
||||
@@ -245,6 +256,38 @@ create_by_user :: proc(
|
||||
return create(pattern[start:end], flags, permanent_allocator, temporary_allocator)
|
||||
}
|
||||
|
||||
/*
|
||||
Create a `Match_Iterator` using a string to search, a regular expression to match against it, and a set of flags.
|
||||
|
||||
*Allocates Using Provided Allocators*
|
||||
|
||||
Inputs:
|
||||
- str: The string to iterate over.
|
||||
- pattern: The pattern to match.
|
||||
- flags: A `bit_set` of RegEx flags.
|
||||
- permanent_allocator: The allocator to use for the compiled regular expression. (default: context.allocator)
|
||||
- temporary_allocator: The allocator to use for the intermediate compilation and iteration stages. (default: context.temp_allocator)
|
||||
|
||||
Returns:
|
||||
- result: The `Match_Iterator`.
|
||||
- err: An error, if one occurred.
|
||||
*/
|
||||
create_iterator :: proc(
|
||||
str: string,
|
||||
pattern: string,
|
||||
flags: Flags = {},
|
||||
permanent_allocator := context.allocator,
|
||||
temporary_allocator := context.temp_allocator,
|
||||
) -> (result: Match_Iterator, err: Error) {
|
||||
|
||||
result.haystack = str
|
||||
result.regex = create(pattern, flags, permanent_allocator, temporary_allocator) or_return
|
||||
result.capture = preallocate_capture()
|
||||
result.temp = temporary_allocator
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Match a regular expression against a string and allocate the results into the
|
||||
returned `capture` structure.
|
||||
@@ -387,9 +430,40 @@ match_with_preallocated_capture :: proc(
|
||||
return
|
||||
}
|
||||
|
||||
/*
|
||||
Iterate over a `Match_Iterator` and return successive captures.
|
||||
|
||||
Inputs:
|
||||
- it: Pointer to the `Match_Iterator` to iterate over.
|
||||
|
||||
Returns:
|
||||
- result: `Capture` for this iteration.
|
||||
- ok: A bool indicating if there was a match, stopping the iteration on `false`.
|
||||
*/
|
||||
match_iterator :: proc(it: ^Match_Iterator) -> (result: Capture, ok: bool) {
|
||||
runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
|
||||
num_groups: int
|
||||
num_groups, ok = match_with_preallocated_capture(
|
||||
it.regex,
|
||||
it.haystack[it.offset:],
|
||||
&it.capture,
|
||||
it.temp,
|
||||
)
|
||||
|
||||
if num_groups > 0 {
|
||||
for i in 0..<num_groups {
|
||||
it.capture.pos[i] += it.offset
|
||||
}
|
||||
it.offset = it.capture.pos[0][1]
|
||||
result = {it.capture.pos[:num_groups], it.capture.groups[:num_groups]}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
match :: proc {
|
||||
match_and_allocate_capture,
|
||||
match_with_preallocated_capture,
|
||||
match_iterator,
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -406,7 +480,7 @@ Returns:
|
||||
@require_results
|
||||
preallocate_capture :: proc(allocator := context.allocator) -> (result: Capture) {
|
||||
context.allocator = allocator
|
||||
result.pos = make([][2]int, common.MAX_CAPTURE_GROUPS)
|
||||
result.pos = make([][2]int, common.MAX_CAPTURE_GROUPS)
|
||||
result.groups = make([]string, common.MAX_CAPTURE_GROUPS)
|
||||
return
|
||||
}
|
||||
@@ -436,7 +510,7 @@ Free all data allocated by the `match_and_allocate_capture` procedure.
|
||||
*Frees Using Provided Allocator*
|
||||
|
||||
Inputs:
|
||||
- capture: A Capture.
|
||||
- capture: A `Capture`.
|
||||
- allocator: (default: context.allocator)
|
||||
*/
|
||||
destroy_capture :: proc(capture: Capture, allocator := context.allocator) {
|
||||
@@ -445,7 +519,23 @@ destroy_capture :: proc(capture: Capture, allocator := context.allocator) {
|
||||
delete(capture.pos)
|
||||
}
|
||||
|
||||
/*
|
||||
Free all data allocated by the `create_iterator` procedure.
|
||||
|
||||
*Frees Using Provided Allocator*
|
||||
|
||||
Inputs:
|
||||
- it: A `Match_Iterator`
|
||||
- allocator: (default: context.allocator)
|
||||
*/
|
||||
destroy_iterator :: proc(it: Match_Iterator, allocator := context.allocator) {
|
||||
context.allocator = allocator
|
||||
destroy(it.regex)
|
||||
destroy(it.capture)
|
||||
}
|
||||
|
||||
destroy :: proc {
|
||||
destroy_regex,
|
||||
destroy_capture,
|
||||
destroy_iterator,
|
||||
}
|
||||
|
||||
@@ -72,6 +72,18 @@ expect_error :: proc(t: ^testing.T, pattern: string, expected_error: typeid, fla
|
||||
testing.expect_value(t, variant_ti, expected_ti, loc = loc)
|
||||
}
|
||||
|
||||
check_capture :: proc(t: ^testing.T, got, expected: regex.Capture, loc := #caller_location) {
|
||||
testing.expect_value(t, len(got.pos), len(got.groups), loc = loc)
|
||||
testing.expect_value(t, len(got.pos), len(expected.pos), loc = loc)
|
||||
testing.expect_value(t, len(got.groups), len(expected.groups), loc = loc)
|
||||
|
||||
if len(got.pos) == len(expected.pos) {
|
||||
for i in 0..<len(got.pos) {
|
||||
testing.expect_value(t, got.pos[i], expected.pos[i], loc = loc)
|
||||
testing.expect_value(t, got.groups[i], expected.groups[i], loc = loc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_concatenation :: proc(t: ^testing.T) {
|
||||
@@ -1080,3 +1092,48 @@ test_preallocated_capture :: proc(t: ^testing.T) {
|
||||
testing.expect_value(t, groups, "")
|
||||
}
|
||||
}
|
||||
|
||||
Iterator_Test :: struct {
|
||||
haystack: string,
|
||||
pattern: string,
|
||||
flags: regex.Flags,
|
||||
expected: []regex.Capture,
|
||||
}
|
||||
|
||||
iterator_vectors := []Iterator_Test{
|
||||
{
|
||||
`xxab32ab52xx`, `(ab\d{1})`, {.Global},
|
||||
{
|
||||
{pos = {{2, 5}, {2, 5}}, groups = {"ab3", "ab3"}},
|
||||
{pos = {{6, 9}, {6, 9}}, groups = {"ab5", "ab5"}},
|
||||
},
|
||||
},
|
||||
{
|
||||
`xxfoobarxfoobarxx`, `f(o)ob(ar)`, {.Global},
|
||||
{
|
||||
{pos = {{2, 8}, {3, 4}, {6, 8}}, groups = {"foobar", "o", "ar"}},
|
||||
{pos = {{9, 15}, {10, 11}, {13, 15}}, groups = {"foobar", "o", "ar"}},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_match_iterator :: proc(t: ^testing.T) {
|
||||
for test in iterator_vectors {
|
||||
it, err := regex.create_iterator(test.haystack, test.pattern, test.flags)
|
||||
defer regex.destroy(it)
|
||||
|
||||
testing.expect_value(t, err, nil)
|
||||
(err == nil) or_continue
|
||||
|
||||
count: int
|
||||
for capture in regex.match(&it) {
|
||||
if count > len(test.expected) {
|
||||
break
|
||||
}
|
||||
check_capture(t, capture, test.expected[count])
|
||||
count += 1
|
||||
}
|
||||
testing.expect_value(t, count, len(test.expected))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user