Add benchmarks for core:text/regex

This commit is contained in:
Feoramund
2024-07-21 23:20:32 -04:00
parent 3e49ceb82a
commit be38ba6c5e
2 changed files with 259 additions and 0 deletions

View File

@@ -2,3 +2,4 @@ package benchmarks
@(require) import "crypto"
@(require) import "hash"
@(require) import "text/regex"

View File

@@ -0,0 +1,258 @@
package benchmark_core_text_regex
import "core:fmt"
import "core:log"
import "core:math/rand"
import "core:mem"
import "core:testing"
import "core:text/regex"
import "core:time"
import "core:unicode/utf8"
randomize_ascii :: proc(data: []u8) {
for i in 0..<len(data) {
data[i] = ' ' + cast(u8)rand.int_max(0x7F - ' ')
}
}
randomize_unicode :: proc(data: []u8) {
for i := 0; i < len(data); /**/ {
check_rune_loop: for {
r := cast(rune)rand.int_max(utf8.MAX_RUNE)
if !utf8.valid_rune(r) {
continue
}
if utf8.rune_size(r) > len(data) - i {
continue
}
r_data, size := utf8.encode_rune(r)
for j in 0..<size {
data[i+j] = r_data[j]
}
i += size
break check_rune_loop
}
}
}
sizes := [?]int {
2 * mem.Kilobyte,
32 * mem.Kilobyte,
64 * mem.Kilobyte,
256 * mem.Kilobyte,
0.50 * mem.Megabyte,
1.00 * mem.Megabyte,
2.00 * mem.Megabyte,
}
@test
expensive_for_backtrackers :: proc(t: ^testing.T) {
counts := [?]int {
8,
16,
32,
64,
}
report: string
for count in counts {
data := make([]u8, count)
pattern := make([]u8, 2 * count + count)
defer {
delete(data)
delete(pattern)
}
for i in 0..<2 * count {
pattern[i] = 'a' if i & 1 == 0 else '?'
}
for i in 2 * count..<2 * count + count {
pattern[i] = 'a'
}
for i in 0..<count {
data[i] = 'a'
}
rex, err := regex.create(cast(string)pattern)
if !testing.expect_value(t, err, nil) {
return
}
defer regex.destroy(rex)
str := cast(string)data
log.debug(rex, str)
start := time.now()
capture, ok := regex.match(rex, str)
done := time.since(start)
defer regex.destroy(capture)
if !testing.expect_value(t, ok, true) {
continue
}
testing.expect_value(t, capture.pos[0], [2]int{0, count})
rate := cast(int)(cast(f64)(count / 2) / (cast(f64)done / 1e9))
report = fmt.tprintf("%s\n +++ [%i : %v : %M/s] Matched `a?^%ia^%i` against `a^%i`.", report, count, done, rate, count, count, count)
}
log.info(report)
}
@test
global_capture_end_word :: proc(t: ^testing.T) {
EXPR :: `Hellope World!`
rex, err := regex.create(EXPR, { .Global })
if !testing.expect_value(t, err, nil) {
return
}
defer regex.destroy(rex)
report := fmt.tprintf("Matching %v over a block of random ASCII text.", rex)
for size in sizes {
data := make([]u8, size)
defer delete(data)
randomize_ascii(data[:])
for r, i in EXPR {
data[len(data) - len(EXPR) + i] = cast(u8)r
}
str := cast(string)data
start := time.now()
capture, ok := regex.match(rex, str)
done := time.since(start)
defer regex.destroy(capture)
if !testing.expect_value(t, ok, true) {
continue
}
testing.expect_value(t, capture.pos[0], [2]int{size - len(EXPR), size})
rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
report = fmt.tprintf("%s\n +++ [%M : %v : %M/s]", report, size, done, rate)
}
log.info(report)
}
@test
global_capture_end_word_unicode :: proc(t: ^testing.T) {
EXPR :: `こにちは`
needle := string(EXPR)
rex, err := regex.create(EXPR, { .Global, .Unicode })
if !testing.expect_value(t, err, nil) {
return
}
defer regex.destroy(rex)
report := fmt.tprintf("Matching %v over a block of random Unicode text.", rex)
for size in sizes {
data := make([]u8, size)
defer delete(data)
randomize_unicode(data[:size - len(needle)])
for i := 0; i < len(needle); i += 1 {
data[len(data) - len(needle) + i] = needle[i]
}
str := cast(string)data
start := time.now()
capture, ok := regex.match(rex, str)
done := time.since(start)
defer regex.destroy(capture)
if !testing.expect_value(t, ok, true) {
continue
}
testing.expect_value(t, capture.groups[0], needle)
rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
report = fmt.tprintf("%s\n +++ [%M : %v : %M/s]", report, size, done, rate)
}
log.info(report)
}
@test
alternations :: proc(t: ^testing.T) {
EXPR :: `a(?:bb|cc|dd|ee|ff)`
rex, err := regex.create(EXPR, { .No_Capture, .Global })
if !testing.expect_value(t, err, nil) {
return
}
defer regex.destroy(rex)
report := fmt.tprintf("Matching %v over a text block of only `a`s.", rex)
for size in sizes {
data := make([]u8, size)
defer delete(data)
for i in 0..<size {
data[i] = 'a'
}
str := cast(string)data
start := time.now()
_, ok := regex.match(rex, str)
done := time.since(start)
testing.expect_value(t, ok, false)
rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
report = fmt.tprintf("%s\n +++ [%M : %v : %M/s]", report, size, done, rate)
}
log.info(report)
}
@test
classes :: proc(t: ^testing.T) {
EXPR :: `[\w\d]+`
NEEDLE :: "0123456789abcdef"
rex, err := regex.create(EXPR, { .Global })
if !testing.expect_value(t, err, nil) {
return
}
defer regex.destroy(rex)
report := fmt.tprintf("Matching %v over a string of spaces with %q at the end.", rex, NEEDLE)
for size in sizes {
data := make([]u8, size)
defer delete(data)
for i in 0..<size {
data[i] = ' '
}
for r, i in NEEDLE {
data[len(data) - len(NEEDLE) + i] = cast(u8)r
}
str := cast(string)data
start := time.now()
capture, ok := regex.match(rex, str)
done := time.since(start)
defer regex.destroy(capture)
if !testing.expect_value(t, ok, true) {
continue
}
testing.expect_value(t, capture.pos[0], [2]int{size - len(NEEDLE), size})
rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
report = fmt.tprintf("%s\n +++ [%M : %v : %M/s]", report, size, done, rate)
}
log.info(report)
}