mirror of
https://github.com/odin-lang/Odin.git
synced 2026-06-10 12:28:11 +00:00
Add benchmarks for core:text/regex
This commit is contained in:
@@ -2,3 +2,4 @@ package benchmarks
|
||||
|
||||
@(require) import "crypto"
|
||||
@(require) import "hash"
|
||||
@(require) import "text/regex"
|
||||
|
||||
258
tests/benchmark/text/regex/benchmark_regex.odin
Normal file
258
tests/benchmark/text/regex/benchmark_regex.odin
Normal file
@@ -0,0 +1,258 @@
|
||||
package benchmark_core_text_regex
|
||||
|
||||
import "core:fmt"
|
||||
import "core:log"
|
||||
import "core:math/rand"
|
||||
import "core:mem"
|
||||
import "core:testing"
|
||||
import "core:text/regex"
|
||||
import "core:time"
|
||||
import "core:unicode/utf8"
|
||||
|
||||
randomize_ascii :: proc(data: []u8) {
|
||||
for i in 0..<len(data) {
|
||||
data[i] = ' ' + cast(u8)rand.int_max(0x7F - ' ')
|
||||
}
|
||||
}
|
||||
|
||||
randomize_unicode :: proc(data: []u8) {
|
||||
for i := 0; i < len(data); /**/ {
|
||||
check_rune_loop: for {
|
||||
r := cast(rune)rand.int_max(utf8.MAX_RUNE)
|
||||
if !utf8.valid_rune(r) {
|
||||
continue
|
||||
}
|
||||
if utf8.rune_size(r) > len(data) - i {
|
||||
continue
|
||||
}
|
||||
|
||||
r_data, size := utf8.encode_rune(r)
|
||||
for j in 0..<size {
|
||||
data[i+j] = r_data[j]
|
||||
}
|
||||
|
||||
i += size
|
||||
break check_rune_loop
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sizes := [?]int {
|
||||
2 * mem.Kilobyte,
|
||||
32 * mem.Kilobyte,
|
||||
64 * mem.Kilobyte,
|
||||
256 * mem.Kilobyte,
|
||||
0.50 * mem.Megabyte,
|
||||
1.00 * mem.Megabyte,
|
||||
2.00 * mem.Megabyte,
|
||||
}
|
||||
|
||||
@test
|
||||
expensive_for_backtrackers :: proc(t: ^testing.T) {
|
||||
counts := [?]int {
|
||||
8,
|
||||
16,
|
||||
32,
|
||||
64,
|
||||
}
|
||||
|
||||
report: string
|
||||
|
||||
for count in counts {
|
||||
data := make([]u8, count)
|
||||
pattern := make([]u8, 2 * count + count)
|
||||
defer {
|
||||
delete(data)
|
||||
delete(pattern)
|
||||
}
|
||||
for i in 0..<2 * count {
|
||||
pattern[i] = 'a' if i & 1 == 0 else '?'
|
||||
}
|
||||
for i in 2 * count..<2 * count + count {
|
||||
pattern[i] = 'a'
|
||||
}
|
||||
for i in 0..<count {
|
||||
data[i] = 'a'
|
||||
}
|
||||
|
||||
rex, err := regex.create(cast(string)pattern)
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
defer regex.destroy(rex)
|
||||
|
||||
str := cast(string)data
|
||||
|
||||
log.debug(rex, str)
|
||||
|
||||
start := time.now()
|
||||
capture, ok := regex.match(rex, str)
|
||||
done := time.since(start)
|
||||
defer regex.destroy(capture)
|
||||
|
||||
if !testing.expect_value(t, ok, true) {
|
||||
continue
|
||||
}
|
||||
testing.expect_value(t, capture.pos[0], [2]int{0, count})
|
||||
|
||||
rate := cast(int)(cast(f64)(count / 2) / (cast(f64)done / 1e9))
|
||||
report = fmt.tprintf("%s\n +++ [%i : %v : %M/s] Matched `a?^%ia^%i` against `a^%i`.", report, count, done, rate, count, count, count)
|
||||
}
|
||||
log.info(report)
|
||||
}
|
||||
|
||||
@test
|
||||
global_capture_end_word :: proc(t: ^testing.T) {
|
||||
EXPR :: `Hellope World!`
|
||||
|
||||
rex, err := regex.create(EXPR, { .Global })
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
defer regex.destroy(rex)
|
||||
|
||||
report := fmt.tprintf("Matching %v over a block of random ASCII text.", rex)
|
||||
|
||||
for size in sizes {
|
||||
data := make([]u8, size)
|
||||
defer delete(data)
|
||||
randomize_ascii(data[:])
|
||||
|
||||
for r, i in EXPR {
|
||||
data[len(data) - len(EXPR) + i] = cast(u8)r
|
||||
}
|
||||
|
||||
str := cast(string)data
|
||||
|
||||
start := time.now()
|
||||
capture, ok := regex.match(rex, str)
|
||||
done := time.since(start)
|
||||
defer regex.destroy(capture)
|
||||
|
||||
if !testing.expect_value(t, ok, true) {
|
||||
continue
|
||||
}
|
||||
testing.expect_value(t, capture.pos[0], [2]int{size - len(EXPR), size})
|
||||
|
||||
rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
|
||||
report = fmt.tprintf("%s\n +++ [%M : %v : %M/s]", report, size, done, rate)
|
||||
}
|
||||
log.info(report)
|
||||
}
|
||||
|
||||
@test
|
||||
global_capture_end_word_unicode :: proc(t: ^testing.T) {
|
||||
EXPR :: `こにちは`
|
||||
needle := string(EXPR)
|
||||
|
||||
rex, err := regex.create(EXPR, { .Global, .Unicode })
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
defer regex.destroy(rex)
|
||||
|
||||
report := fmt.tprintf("Matching %v over a block of random Unicode text.", rex)
|
||||
|
||||
for size in sizes {
|
||||
data := make([]u8, size)
|
||||
defer delete(data)
|
||||
randomize_unicode(data[:size - len(needle)])
|
||||
|
||||
for i := 0; i < len(needle); i += 1 {
|
||||
data[len(data) - len(needle) + i] = needle[i]
|
||||
}
|
||||
|
||||
str := cast(string)data
|
||||
|
||||
start := time.now()
|
||||
capture, ok := regex.match(rex, str)
|
||||
done := time.since(start)
|
||||
defer regex.destroy(capture)
|
||||
|
||||
if !testing.expect_value(t, ok, true) {
|
||||
continue
|
||||
}
|
||||
testing.expect_value(t, capture.groups[0], needle)
|
||||
|
||||
rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
|
||||
report = fmt.tprintf("%s\n +++ [%M : %v : %M/s]", report, size, done, rate)
|
||||
}
|
||||
log.info(report)
|
||||
}
|
||||
|
||||
|
||||
@test
|
||||
alternations :: proc(t: ^testing.T) {
|
||||
EXPR :: `a(?:bb|cc|dd|ee|ff)`
|
||||
|
||||
rex, err := regex.create(EXPR, { .No_Capture, .Global })
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
defer regex.destroy(rex)
|
||||
|
||||
report := fmt.tprintf("Matching %v over a text block of only `a`s.", rex)
|
||||
|
||||
for size in sizes {
|
||||
data := make([]u8, size)
|
||||
defer delete(data)
|
||||
for i in 0..<size {
|
||||
data[i] = 'a'
|
||||
}
|
||||
|
||||
str := cast(string)data
|
||||
|
||||
start := time.now()
|
||||
_, ok := regex.match(rex, str)
|
||||
done := time.since(start)
|
||||
|
||||
testing.expect_value(t, ok, false)
|
||||
|
||||
rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
|
||||
report = fmt.tprintf("%s\n +++ [%M : %v : %M/s]", report, size, done, rate)
|
||||
}
|
||||
log.info(report)
|
||||
}
|
||||
|
||||
@test
|
||||
classes :: proc(t: ^testing.T) {
|
||||
EXPR :: `[\w\d]+`
|
||||
NEEDLE :: "0123456789abcdef"
|
||||
|
||||
rex, err := regex.create(EXPR, { .Global })
|
||||
if !testing.expect_value(t, err, nil) {
|
||||
return
|
||||
}
|
||||
defer regex.destroy(rex)
|
||||
|
||||
report := fmt.tprintf("Matching %v over a string of spaces with %q at the end.", rex, NEEDLE)
|
||||
|
||||
for size in sizes {
|
||||
data := make([]u8, size)
|
||||
defer delete(data)
|
||||
|
||||
for i in 0..<size {
|
||||
data[i] = ' '
|
||||
}
|
||||
|
||||
for r, i in NEEDLE {
|
||||
data[len(data) - len(NEEDLE) + i] = cast(u8)r
|
||||
}
|
||||
|
||||
str := cast(string)data
|
||||
|
||||
start := time.now()
|
||||
capture, ok := regex.match(rex, str)
|
||||
done := time.since(start)
|
||||
defer regex.destroy(capture)
|
||||
|
||||
if !testing.expect_value(t, ok, true) {
|
||||
continue
|
||||
}
|
||||
testing.expect_value(t, capture.pos[0], [2]int{size - len(NEEDLE), size})
|
||||
|
||||
rate := cast(int)(cast(f64)size / (cast(f64)done / 1e9))
|
||||
report = fmt.tprintf("%s\n +++ [%M : %v : %M/s]", report, size, done, rate)
|
||||
}
|
||||
log.info(report)
|
||||
}
|
||||
Reference in New Issue
Block a user