mirror of
https://github.com/odin-lang/Odin.git
synced 2025-12-29 09:24:33 +00:00
Merge branch 'master' into compiler-improvements-2022-12
This commit is contained in:
4
.github/workflows/stale.yml
vendored
4
.github/workflows/stale.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Close Stale Issues
|
||||
uses: actions/stale@v4.1.0
|
||||
uses: actions/stale@v7.0.0
|
||||
with:
|
||||
# stale-issue-message: |
|
||||
# Hello!
|
||||
@@ -36,7 +36,7 @@ jobs:
|
||||
# The motivation for this automation is to help prioritize issues in the backlog and not ignore, reject, or belittle anyone..
|
||||
|
||||
days-before-stale: 120
|
||||
days-before-close: 30
|
||||
days-before-close: -1
|
||||
exempt-draft-pr: true
|
||||
ascending: true
|
||||
operations-per-run: 1000
|
||||
|
||||
@@ -153,7 +153,7 @@ scratch_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
|
||||
s := (^Scratch_Allocator)(allocator_data)
|
||||
|
||||
if s.data == nil {
|
||||
DEFAULT_BACKING_SIZE :: 1<<22
|
||||
DEFAULT_BACKING_SIZE :: 4 * Megabyte
|
||||
if !(context.allocator.procedure != scratch_allocator_proc &&
|
||||
context.allocator.data != allocator_data) {
|
||||
panic("cyclic initialization of the scratch allocator with itself")
|
||||
|
||||
@@ -3,11 +3,11 @@ package mem
|
||||
import "core:runtime"
|
||||
import "core:intrinsics"
|
||||
|
||||
Byte :: 1
|
||||
Kilobyte :: 1024 * Byte
|
||||
Megabyte :: 1024 * Kilobyte
|
||||
Gigabyte :: 1024 * Megabyte
|
||||
Terabyte :: 1024 * Gigabyte
|
||||
Byte :: runtime.Byte
|
||||
Kilobyte :: runtime.Kilobyte
|
||||
Megabyte :: runtime.Megabyte
|
||||
Gigabyte :: runtime.Gigabyte
|
||||
Terabyte :: runtime.Terabyte
|
||||
|
||||
set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr {
|
||||
return runtime.memset(data, i32(value), len)
|
||||
|
||||
@@ -19,11 +19,11 @@ Arena :: struct {
|
||||
|
||||
|
||||
// 1 MiB should be enough to start with
|
||||
DEFAULT_ARENA_STATIC_COMMIT_SIZE :: 1<<20
|
||||
DEFAULT_ARENA_STATIC_COMMIT_SIZE :: mem.Megabyte
|
||||
DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: DEFAULT_ARENA_STATIC_COMMIT_SIZE
|
||||
|
||||
// 1 GiB on 64-bit systems, 128 MiB on 32-bit systems by default
|
||||
DEFAULT_ARENA_STATIC_RESERVE_SIZE :: 1<<30 when size_of(uintptr) == 8 else 1<<27
|
||||
DEFAULT_ARENA_STATIC_RESERVE_SIZE :: mem.Gigabyte when size_of(uintptr) == 8 else 128 * mem.Megabyte
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -14,11 +14,12 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F
|
||||
|
||||
dirpath: string
|
||||
dirpath, err = absolute_path_from_handle(fd)
|
||||
|
||||
if err != ERROR_NONE {
|
||||
return
|
||||
}
|
||||
|
||||
defer delete(dirpath)
|
||||
|
||||
n := n
|
||||
size := n
|
||||
if n <= 0 {
|
||||
|
||||
@@ -342,21 +342,33 @@ get_last_error_string :: proc() -> string {
|
||||
}
|
||||
|
||||
open :: proc(path: string, flags: int = O_RDWR, mode: int = 0) -> (Handle, Errno) {
|
||||
isDir := is_dir_path(path)
|
||||
flags := flags
|
||||
if isDir {
|
||||
/*
|
||||
@INFO(Platin): To make it impossible to use the wrong flag for dir's
|
||||
as you can't write to a dir only read which makes it fail to open
|
||||
*/
|
||||
flags = O_RDONLY
|
||||
}
|
||||
|
||||
cstr := strings.clone_to_cstring(path, context.temp_allocator)
|
||||
handle := _unix_open(cstr, i32(flags), u16(mode))
|
||||
if handle == -1 {
|
||||
return INVALID_HANDLE, 1
|
||||
return INVALID_HANDLE, cast(Errno)get_last_error()
|
||||
}
|
||||
|
||||
when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 {
|
||||
if mode != 0 {
|
||||
/*
|
||||
@INFO(Platin): this is only done because O_CREATE for some reason fails to apply mode
|
||||
should not happen if the handle is a directory
|
||||
*/
|
||||
if mode != 0 && !isDir {
|
||||
err := fchmod(handle, cast(u16)mode)
|
||||
if err != 0 {
|
||||
_unix_close(handle)
|
||||
return INVALID_HANDLE, 1
|
||||
return INVALID_HANDLE, cast(Errno)err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return handle, 0
|
||||
}
|
||||
|
||||
@@ -329,6 +329,12 @@ Allocator :: struct {
|
||||
data: rawptr,
|
||||
}
|
||||
|
||||
Byte :: 1
|
||||
Kilobyte :: 1024 * Byte
|
||||
Megabyte :: 1024 * Kilobyte
|
||||
Gigabyte :: 1024 * Megabyte
|
||||
Terabyte :: 1024 * Gigabyte
|
||||
|
||||
// Logging stuff
|
||||
|
||||
Logger_Level :: enum uint {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
package runtime
|
||||
|
||||
DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 1<<22)
|
||||
DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 4 * Megabyte)
|
||||
|
||||
|
||||
when ODIN_OS == .Freestanding || ODIN_OS == .JS || ODIN_DEFAULT_TO_NIL_ALLOCATOR {
|
||||
@@ -197,4 +197,4 @@ default_temp_allocator :: proc(allocator: ^Default_Temp_Allocator) -> Allocator
|
||||
procedure = default_temp_allocator_proc,
|
||||
data = allocator,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
959
core/text/match/strlib.odin
Normal file
959
core/text/match/strlib.odin
Normal file
@@ -0,0 +1,959 @@
|
||||
package text_match
|
||||
|
||||
import "core:runtime"
|
||||
import "core:unicode"
|
||||
import "core:unicode/utf8"
|
||||
import "core:strings"
|
||||
|
||||
MAX_CAPTURES :: 32
|
||||
|
||||
Capture :: struct {
|
||||
init: int,
|
||||
len: int,
|
||||
}
|
||||
|
||||
Match :: struct {
|
||||
byte_start, byte_end: int,
|
||||
}
|
||||
|
||||
Error :: enum {
|
||||
OK,
|
||||
OOB,
|
||||
Invalid_Capture_Index,
|
||||
Invalid_Pattern_Capture,
|
||||
Unfinished_Capture,
|
||||
Malformed_Pattern,
|
||||
Rune_Error,
|
||||
Match_Invalid,
|
||||
}
|
||||
|
||||
L_ESC :: '%'
|
||||
CAP_POSITION :: -2
|
||||
CAP_UNFINISHED :: -1
|
||||
INVALID :: -1
|
||||
|
||||
Match_State :: struct {
|
||||
src: string,
|
||||
pattern: string,
|
||||
level: int,
|
||||
capture: [MAX_CAPTURES]Capture,
|
||||
}
|
||||
|
||||
match_class :: proc(c: rune, cl: rune) -> (res: bool) {
|
||||
switch unicode.to_lower(cl) {
|
||||
case 'a': res = is_alpha(c)
|
||||
case 'c': res = is_cntrl(c)
|
||||
case 'd': res = is_digit(c)
|
||||
case 'g': res = is_graph(c)
|
||||
case 'l': res = is_lower(c)
|
||||
case 'p': res = is_punct(c)
|
||||
case 's': res = is_space(c)
|
||||
case 'u': res = is_upper(c)
|
||||
case 'w': res = is_alnum(c)
|
||||
case 'x': res = is_xdigit(c)
|
||||
case: return cl == c
|
||||
}
|
||||
|
||||
return is_lower(cl) ? res : !res
|
||||
}
|
||||
|
||||
is_alpha :: unicode.is_alpha
|
||||
is_digit :: unicode.is_digit
|
||||
is_lower :: unicode.is_lower
|
||||
is_upper :: unicode.is_upper
|
||||
is_punct :: unicode.is_punct
|
||||
is_space :: unicode.is_space
|
||||
is_cntrl :: unicode.is_control
|
||||
|
||||
is_alnum :: proc(c: rune) -> bool {
|
||||
return unicode.is_alpha(c) || unicode.is_digit(c)
|
||||
}
|
||||
|
||||
is_graph :: proc(c: rune) -> bool {
|
||||
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c)
|
||||
}
|
||||
|
||||
is_xdigit :: proc(c: rune) -> bool {
|
||||
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c)
|
||||
}
|
||||
|
||||
// find the first utf8 charater and its size, return an error if the character is an error
|
||||
utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) {
|
||||
c, size = utf8.decode_rune_in_string(bytes)
|
||||
|
||||
if c == utf8.RUNE_ERROR {
|
||||
err = .Rune_Error
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// find the first utf8 charater and its size and advance the index
|
||||
// return an error if the character is an error
|
||||
utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) {
|
||||
size: int
|
||||
c, size = utf8.decode_rune_in_string(bytes[index^:])
|
||||
|
||||
if c == utf8.RUNE_ERROR {
|
||||
err = .Rune_Error
|
||||
}
|
||||
|
||||
index^ += size
|
||||
return
|
||||
}
|
||||
|
||||
// continuation byte?
|
||||
is_cont :: proc(b: byte) -> bool {
|
||||
return b & 0xc0 == 0x80
|
||||
}
|
||||
|
||||
utf8_prev :: proc(bytes: string, a, b: int) -> int {
|
||||
b := b
|
||||
|
||||
for a < b && is_cont(bytes[b - 1]) {
|
||||
b -= 1
|
||||
}
|
||||
|
||||
return a < b ? b - 1 : a
|
||||
}
|
||||
|
||||
utf8_next :: proc(bytes: string, a: int) -> int {
|
||||
a := a
|
||||
b := len(bytes)
|
||||
|
||||
for a < b - 1 && is_cont(bytes[a + 1]) {
|
||||
a += 1
|
||||
}
|
||||
|
||||
return a < b ? a + 1 : b
|
||||
}
|
||||
|
||||
check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) {
|
||||
l := int(l - '1')
|
||||
|
||||
if l < 0 || l >= ms.level || ms.capture[l].len == CAP_UNFINISHED {
|
||||
return 0, .Invalid_Capture_Index
|
||||
}
|
||||
|
||||
return l, .OK
|
||||
}
|
||||
|
||||
capture_to_close :: proc(ms: ^Match_State) -> (int, Error) {
|
||||
level := ms.level - 1
|
||||
|
||||
for level >= 0 {
|
||||
if ms.capture[level].len == CAP_UNFINISHED {
|
||||
return level, .OK
|
||||
}
|
||||
|
||||
level -= 1
|
||||
}
|
||||
|
||||
return 0, .Invalid_Pattern_Capture
|
||||
}
|
||||
|
||||
class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) {
|
||||
step = p
|
||||
ch := utf8_advance(ms.pattern, &step) or_return
|
||||
|
||||
switch ch {
|
||||
case L_ESC:
|
||||
if step == len(ms.pattern) {
|
||||
err = .Malformed_Pattern
|
||||
return
|
||||
}
|
||||
|
||||
utf8_advance(ms.pattern, &step) or_return
|
||||
|
||||
case '[':
|
||||
// fine with step by 1
|
||||
if step + 1 < len(ms.pattern) && ms.pattern[step] == '^' {
|
||||
step += 1
|
||||
}
|
||||
|
||||
// run till end is reached
|
||||
for {
|
||||
if step == len(ms.pattern) {
|
||||
err = .Malformed_Pattern
|
||||
return
|
||||
}
|
||||
|
||||
if ms.pattern[step] == ']' {
|
||||
break
|
||||
}
|
||||
|
||||
// dont care about utf8 here
|
||||
step += 1
|
||||
|
||||
if step < len(ms.pattern) && ms.pattern[step] == L_ESC {
|
||||
// skip escapes like '%'
|
||||
step += 1
|
||||
}
|
||||
}
|
||||
|
||||
// advance last time
|
||||
step += 1
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool, err: Error) {
|
||||
sig = true
|
||||
p := p
|
||||
|
||||
if ms.pattern[p + 1] == '^' {
|
||||
p += 1
|
||||
sig = false
|
||||
}
|
||||
|
||||
// while inside of class range
|
||||
for p < ec {
|
||||
char := utf8_advance(ms.pattern, &p) or_return
|
||||
|
||||
// e.g. %a
|
||||
if char == L_ESC {
|
||||
next := utf8_advance(ms.pattern, &p) or_return
|
||||
|
||||
if match_class(c, next) {
|
||||
return
|
||||
}
|
||||
} else {
|
||||
next, next_size := utf8_peek(ms.pattern[p:]) or_return
|
||||
|
||||
// TODO test case for [a-???] where ??? is missing
|
||||
if next == '-' && p + next_size < len(ms.pattern) {
|
||||
// advance 2 codepoints
|
||||
p += next_size
|
||||
last := utf8_advance(ms.pattern, &p) or_return
|
||||
|
||||
if char <= c && c <= last {
|
||||
return
|
||||
}
|
||||
} else if char == c {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sig = !sig
|
||||
return
|
||||
}
|
||||
|
||||
single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) {
|
||||
if s >= len(ms.src) {
|
||||
return
|
||||
}
|
||||
|
||||
pchar, psize := utf8_peek(ms.pattern[p:]) or_return
|
||||
schar, ssize := utf8_peek(ms.src[s:]) or_return
|
||||
schar_size = ssize
|
||||
|
||||
switch pchar {
|
||||
case '.': matched = true
|
||||
case L_ESC:
|
||||
pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return
|
||||
matched = match_class(schar, pchar_next)
|
||||
case '[': matched = match_bracket_class(ms, schar, p, ep - 1) or_return
|
||||
case: matched = schar == pchar
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
|
||||
if p >= len(ms.pattern) - 1 {
|
||||
return INVALID, .Invalid_Pattern_Capture
|
||||
}
|
||||
|
||||
schar, ssize := utf8_peek(ms.src[s:]) or_return
|
||||
pchar, psize := utf8_peek(ms.pattern[p:]) or_return
|
||||
|
||||
// skip until the src and pattern match
|
||||
if schar != pchar {
|
||||
return INVALID, .OK
|
||||
}
|
||||
|
||||
s_begin := s
|
||||
cont := 1
|
||||
s := s + ssize
|
||||
begin := pchar
|
||||
end, _ := utf8_peek(ms.pattern[p + psize:]) or_return
|
||||
|
||||
for s < len(ms.src) {
|
||||
ch := utf8_advance(ms.src, &s) or_return
|
||||
|
||||
switch ch{
|
||||
case end:
|
||||
cont -= 1
|
||||
|
||||
if cont == 0 {
|
||||
return s, .OK
|
||||
}
|
||||
|
||||
case begin:
|
||||
cont += 1
|
||||
}
|
||||
}
|
||||
|
||||
return INVALID, .OK
|
||||
}
|
||||
|
||||
max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) {
|
||||
m := s
|
||||
|
||||
// count up matches
|
||||
for {
|
||||
matched, size := single_match(ms, m, p, ep) or_return
|
||||
|
||||
if !matched {
|
||||
break
|
||||
}
|
||||
|
||||
m += size
|
||||
}
|
||||
|
||||
for s <= m {
|
||||
result := match(ms, m, ep + 1) or_return
|
||||
|
||||
if result != INVALID {
|
||||
return result, .OK
|
||||
}
|
||||
|
||||
if s == m {
|
||||
break
|
||||
}
|
||||
|
||||
m = utf8_prev(ms.src, s, m)
|
||||
}
|
||||
|
||||
return INVALID, .OK
|
||||
}
|
||||
|
||||
min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) {
|
||||
s := s
|
||||
|
||||
for {
|
||||
result := match(ms, s, ep + 1) or_return
|
||||
|
||||
if result != INVALID {
|
||||
return result, .OK
|
||||
} else {
|
||||
// TODO receive next step maybe?
|
||||
matched, rune_size := single_match(ms, s, p, ep) or_return
|
||||
|
||||
if matched {
|
||||
s += rune_size
|
||||
} else {
|
||||
return INVALID, .OK
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Error) {
|
||||
level := ms.level
|
||||
|
||||
ms.capture[level].init = s
|
||||
ms.capture[level].len = what
|
||||
ms.level += 1
|
||||
|
||||
res = match(ms, s, p) or_return
|
||||
if res == INVALID {
|
||||
ms.level -= 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) {
|
||||
l := capture_to_close(ms) or_return
|
||||
|
||||
// TODO double check, could do string as int index
|
||||
ms.capture[l].len = s - ms.capture[l].init
|
||||
|
||||
res = match(ms, s, p) or_return
|
||||
if res == INVALID {
|
||||
ms.capture[l].len = CAP_UNFINISHED
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: Error) {
|
||||
index := check_capture(ms, char) or_return
|
||||
length := ms.capture[index].len
|
||||
|
||||
if len(ms.src) - s >= length {
|
||||
return s + length, .OK
|
||||
}
|
||||
|
||||
return INVALID, .OK
|
||||
}
|
||||
|
||||
match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
|
||||
s := s
|
||||
p := p
|
||||
|
||||
if p == len(ms.pattern) {
|
||||
return s, .OK
|
||||
}
|
||||
|
||||
// NOTE we can walk by ascii steps if we know the characters are ascii
|
||||
char, _ := utf8_peek(ms.pattern[p:]) or_return
|
||||
switch char {
|
||||
case '(':
|
||||
if p + 1 < len(ms.pattern) && ms.pattern[p + 1] == ')' {
|
||||
s = start_capture(ms, s, p + 2, CAP_POSITION) or_return
|
||||
} else {
|
||||
s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return
|
||||
}
|
||||
|
||||
case ')':
|
||||
s = end_capture(ms, s, p + 1) or_return
|
||||
|
||||
case '$':
|
||||
if p + 1 != len(ms.pattern) {
|
||||
return match_default(ms, s, p)
|
||||
}
|
||||
|
||||
if len(ms.src) != s {
|
||||
s = INVALID
|
||||
}
|
||||
|
||||
case L_ESC:
|
||||
// stop short patterns like "%" only
|
||||
if p + 1 >= len(ms.pattern) {
|
||||
err = .OOB
|
||||
return
|
||||
}
|
||||
|
||||
switch ms.pattern[p + 1] {
|
||||
// balanced string
|
||||
case 'b':
|
||||
s = match_balance(ms, s, p + 2) or_return
|
||||
|
||||
if s != INVALID {
|
||||
// eg after %b()
|
||||
return match(ms, s, p + 4)
|
||||
}
|
||||
|
||||
// frontier
|
||||
case 'f':
|
||||
p += 2
|
||||
|
||||
if ms.pattern[p] != '[' {
|
||||
return INVALID, .Invalid_Pattern_Capture
|
||||
}
|
||||
|
||||
ep := class_end(ms, p) or_return
|
||||
previous, current: rune
|
||||
|
||||
// get previous
|
||||
if s != 0 {
|
||||
temp := utf8_prev(ms.src, 0, s)
|
||||
previous, _ = utf8_peek(ms.src[temp:]) or_return
|
||||
}
|
||||
|
||||
// get current
|
||||
if s != len(ms.src) {
|
||||
current, _ = utf8_peek(ms.src[s:]) or_return
|
||||
}
|
||||
|
||||
m1 := match_bracket_class(ms, previous, p, ep - 1) or_return
|
||||
m2 := match_bracket_class(ms, current, p, ep - 1) or_return
|
||||
|
||||
if !m1 && m2 {
|
||||
return match(ms, s, ep)
|
||||
}
|
||||
|
||||
s = INVALID
|
||||
|
||||
// capture group
|
||||
case '0'..<'9':
|
||||
s = match_capture(ms, s, rune(ms.pattern[p + 1])) or_return
|
||||
|
||||
if s != INVALID {
|
||||
return match(ms, s, p + 2)
|
||||
}
|
||||
|
||||
case: return match_default(ms, s, p)
|
||||
}
|
||||
|
||||
case:
|
||||
return match_default(ms, s, p)
|
||||
}
|
||||
|
||||
return s, .OK
|
||||
}
|
||||
|
||||
match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
|
||||
s := s
|
||||
ep := class_end(ms, p) or_return
|
||||
single_matched, ssize := single_match(ms, s, p, ep) or_return
|
||||
|
||||
if !single_matched {
|
||||
epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
|
||||
|
||||
switch epc {
|
||||
case '*', '?', '-': return match(ms, s, ep + 1)
|
||||
case: s = INVALID
|
||||
}
|
||||
} else {
|
||||
epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
|
||||
|
||||
switch epc {
|
||||
case '?':
|
||||
result := match(ms, s + ssize, ep + 1) or_return
|
||||
|
||||
if result != INVALID {
|
||||
s = result
|
||||
} else {
|
||||
return match(ms, s, ep + 1)
|
||||
}
|
||||
|
||||
case '+': s = max_expand(ms, s + ssize, p, ep) or_return
|
||||
case '*': s = max_expand(ms, s, p, ep) or_return
|
||||
case '-': s = min_expand(ms, s, p, ep) or_return
|
||||
case: return match(ms, s + ssize, ep)
|
||||
}
|
||||
}
|
||||
|
||||
return s, .OK
|
||||
}
|
||||
|
||||
push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []Match) -> (err: Error) {
|
||||
if i >= ms.level {
|
||||
if i == 0 {
|
||||
matches[0] = { 0, e - s }
|
||||
} else {
|
||||
err = .Invalid_Capture_Index
|
||||
}
|
||||
} else {
|
||||
init := ms.capture[i].init
|
||||
length := ms.capture[i].len
|
||||
|
||||
switch length {
|
||||
case CAP_UNFINISHED: err = .Unfinished_Capture
|
||||
case CAP_POSITION: matches[i] = { init, init + 1 }
|
||||
case: matches[i] = { init, init + length }
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
push_captures :: proc(
|
||||
ms: ^Match_State,
|
||||
s: int,
|
||||
e: int,
|
||||
matches: []Match,
|
||||
) -> (nlevels: int, err: Error) {
|
||||
nlevels = 1 if ms.level == 0 && s != -1 else ms.level
|
||||
|
||||
for i in 0..<nlevels {
|
||||
push_onecapture(ms, i, s, e, matches) or_return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// SPECIALS := "^$*+?.([%-"
|
||||
// all special characters inside a small ascii array
|
||||
SPECIALS_TABLE := [256]bool {
|
||||
'^' = true,
|
||||
'$' = true,
|
||||
'*' = true,
|
||||
'+' = true,
|
||||
'?' = true,
|
||||
'.' = true,
|
||||
'(' = true,
|
||||
'[' = true,
|
||||
'%' = true,
|
||||
'-' = true,
|
||||
}
|
||||
|
||||
// helper call to quick search for special characters
|
||||
index_special :: proc(text: string) -> int {
|
||||
for i in 0..<len(text) {
|
||||
if SPECIALS_TABLE[text[i]] {
|
||||
return i
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
lmem_find :: proc(s1, s2: string) -> int {
|
||||
l1 := len(s1)
|
||||
l2 := len(s2)
|
||||
|
||||
if l2 == 0 {
|
||||
return 0
|
||||
} else if l2 > l1 {
|
||||
return -1
|
||||
} else {
|
||||
init := strings.index_byte(s1, s2[0])
|
||||
end := init + l2
|
||||
|
||||
for end <= l1 && init != -1 {
|
||||
init += 1
|
||||
|
||||
if s1[init - 1:end] == s2 {
|
||||
return init - 1
|
||||
} else {
|
||||
next := strings.index_byte(s1[init:], s2[0])
|
||||
|
||||
if next == -1 {
|
||||
return -1
|
||||
} else {
|
||||
init = init + next
|
||||
end = init + l2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1
|
||||
}
|
||||
|
||||
// find a pattern with in a haystack with an offset
|
||||
// allow_memfind will speed up simple searches
|
||||
find_aux :: proc(
|
||||
haystack: string,
|
||||
pattern: string,
|
||||
offset: int,
|
||||
allow_memfind: bool,
|
||||
matches: ^[MAX_CAPTURES]Match,
|
||||
) -> (captures: int, err: Error) {
|
||||
s := offset
|
||||
p := 0
|
||||
|
||||
specials_idx := index_special(pattern)
|
||||
if allow_memfind && specials_idx == -1 {
|
||||
if index := lmem_find(haystack[s:], pattern); index != -1 {
|
||||
matches[0] = { index + s, index + s + len(pattern) }
|
||||
captures = 1
|
||||
return
|
||||
} else {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
pattern := pattern
|
||||
anchor: bool
|
||||
if len(pattern) > 0 && pattern[0] == '^' {
|
||||
anchor = true
|
||||
pattern = pattern[1:]
|
||||
}
|
||||
|
||||
ms := Match_State {
|
||||
src = haystack,
|
||||
pattern = pattern,
|
||||
}
|
||||
|
||||
for {
|
||||
res := match(&ms, s, p) or_return
|
||||
|
||||
if res != INVALID {
|
||||
// disallow non advancing match
|
||||
if s == res {
|
||||
err = .Match_Invalid
|
||||
}
|
||||
|
||||
// NOTE(Skytrias): first result is reserved for a full match
|
||||
matches[0] = { s, res }
|
||||
|
||||
// rest are the actual captures
|
||||
captures = push_captures(&ms, -1, -1, matches[1:]) or_return
|
||||
captures += 1
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
s += 1
|
||||
|
||||
if !(s < len(ms.src) && !anchor) {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// iterative matching which returns the 0th/1st match
|
||||
// rest has to be used from captures
|
||||
gmatch :: proc(
|
||||
haystack: ^string,
|
||||
pattern: string,
|
||||
captures: ^[MAX_CAPTURES]Match,
|
||||
) -> (res: string, ok: bool) {
|
||||
if len(haystack) > 0 {
|
||||
length, err := find_aux(haystack^, pattern, 0, false, captures)
|
||||
|
||||
if length != 0 && err == .OK {
|
||||
ok = true
|
||||
first := length > 1 ? 1 : 0
|
||||
cap := captures[first]
|
||||
res = haystack[cap.byte_start:cap.byte_end]
|
||||
haystack^ = haystack[cap.byte_end:]
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// gsub with builder, replace patterns found with the replace content
|
||||
gsub_builder :: proc(
|
||||
builder: ^strings.Builder,
|
||||
haystack: string,
|
||||
pattern: string,
|
||||
replace: string,
|
||||
) -> string {
|
||||
// find matches
|
||||
captures: [MAX_CAPTURES]Match
|
||||
haystack := haystack
|
||||
|
||||
for {
|
||||
length, err := find_aux(haystack, pattern, 0, false, &captures)
|
||||
|
||||
// done
|
||||
if length == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
if err != .OK {
|
||||
return {}
|
||||
}
|
||||
|
||||
cap := captures[0]
|
||||
|
||||
// write front till capture
|
||||
strings.write_string(builder, haystack[:cap.byte_start])
|
||||
|
||||
// write replacements
|
||||
strings.write_string(builder, replace)
|
||||
|
||||
// advance string till end
|
||||
haystack = haystack[cap.byte_end:]
|
||||
}
|
||||
|
||||
strings.write_string(builder, haystack[:])
|
||||
return strings.to_string(builder^)
|
||||
}
|
||||
|
||||
// uses temp builder to build initial string - then allocates the result
|
||||
gsub_allocator :: proc(
|
||||
haystack: string,
|
||||
pattern: string,
|
||||
replace: string,
|
||||
allocator := context.allocator,
|
||||
) -> string {
|
||||
builder := strings.builder_make(0, 256, context.temp_allocator)
|
||||
return gsub_builder(&builder, haystack, pattern, replace)
|
||||
}
|
||||
|
||||
Gsub_Proc :: proc(
|
||||
// optional passed data
|
||||
data: rawptr,
|
||||
// word match found
|
||||
word: string,
|
||||
// current haystack for found captures
|
||||
haystack: string,
|
||||
// found captures - empty for no captures
|
||||
captures: []Match,
|
||||
)
|
||||
|
||||
// call a procedure on every match in the haystack
|
||||
gsub_with :: proc(
|
||||
haystack: string,
|
||||
pattern: string,
|
||||
data: rawptr,
|
||||
call: Gsub_Proc,
|
||||
) {
|
||||
// find matches
|
||||
captures: [MAX_CAPTURES]Match
|
||||
haystack := haystack
|
||||
|
||||
for {
|
||||
length, err := find_aux(haystack, pattern, 0, false, &captures)
|
||||
|
||||
// done
|
||||
if length == 0 || err != .OK {
|
||||
break
|
||||
}
|
||||
|
||||
cap := captures[0]
|
||||
|
||||
word := haystack[cap.byte_start:cap.byte_end]
|
||||
call(data, word, haystack, captures[1:length])
|
||||
|
||||
// advance string till end
|
||||
haystack = haystack[cap.byte_end:]
|
||||
}
|
||||
}
|
||||
|
||||
gsub :: proc { gsub_builder, gsub_allocator }
|
||||
|
||||
// iterative find with zeroth capture only
|
||||
gfind :: proc(
|
||||
haystack: ^string,
|
||||
pattern: string,
|
||||
captures: ^[MAX_CAPTURES]Match,
|
||||
) -> (res: string, ok: bool) {
|
||||
if len(haystack) > 0 {
|
||||
length, err := find_aux(haystack^, pattern, 0, true, captures)
|
||||
|
||||
if length != 0 && err == .OK {
|
||||
ok = true
|
||||
cap := captures[0]
|
||||
res = haystack[cap.byte_start:cap.byte_end]
|
||||
haystack^ = haystack[cap.byte_end:]
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// rebuilds a pattern into a case insensitive pattern
|
||||
pattern_case_insensitive_builder :: proc(
|
||||
builder: ^strings.Builder,
|
||||
pattern: string,
|
||||
) -> (res: string) {
|
||||
p := pattern
|
||||
last_percent: bool
|
||||
|
||||
for len(p) > 0 {
|
||||
char, size := utf8.decode_rune_in_string(p)
|
||||
|
||||
if unicode.is_alpha(char) && !last_percent {
|
||||
// write character class in manually
|
||||
strings.write_byte(builder, '[')
|
||||
strings.write_rune(builder, unicode.to_lower(char))
|
||||
strings.write_rune(builder, unicode.to_upper(char))
|
||||
strings.write_byte(builder, ']')
|
||||
} else {
|
||||
strings.write_rune(builder, char)
|
||||
}
|
||||
|
||||
last_percent = char == L_ESC
|
||||
p = p[size:]
|
||||
}
|
||||
|
||||
return strings.to_string(builder^)
|
||||
}
|
||||
|
||||
pattern_case_insensitive_allocator :: proc(
|
||||
pattern: string,
|
||||
cap: int = 256,
|
||||
allocator := context.allocator,
|
||||
) -> (res: string) {
|
||||
builder := strings.builder_make(0, cap, context.temp_allocator)
|
||||
return pattern_case_insensitive_builder(&builder, pattern)
|
||||
}
|
||||
|
||||
pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator }
|
||||
|
||||
// Matcher helper struct that stores optional data you might want to use or not
|
||||
// as lua is far more dynamic this helps dealing with too much data
|
||||
// this also allows use of find/match/gmatch at through one struct
|
||||
Matcher :: struct {
|
||||
haystack: string,
|
||||
pattern: string,
|
||||
captures: [MAX_CAPTURES]Match,
|
||||
captures_length: int,
|
||||
offset: int,
|
||||
err: Error,
|
||||
|
||||
// changing content for iterators
|
||||
iter: string,
|
||||
iter_index: int,
|
||||
}
|
||||
|
||||
// init using haystack & pattern and an optional byte offset
|
||||
matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matcher) {
|
||||
res.haystack = haystack
|
||||
res.pattern = pattern
|
||||
res.offset = offset
|
||||
res.iter = haystack
|
||||
return
|
||||
}
|
||||
|
||||
// find the first match and return the byte start / end position in the string, true on success
|
||||
matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bounds_check {
|
||||
matcher.captures_length, matcher.err = find_aux(
|
||||
matcher.haystack,
|
||||
matcher.pattern,
|
||||
matcher.offset,
|
||||
true,
|
||||
&matcher.captures,
|
||||
)
|
||||
ok = matcher.captures_length > 0 && matcher.err == .OK
|
||||
match := matcher.captures[0]
|
||||
start = match.byte_start
|
||||
end = match.byte_end
|
||||
return
|
||||
}
|
||||
|
||||
// find the first match and return the matched word, true on success
|
||||
matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_check {
|
||||
matcher.captures_length, matcher.err = find_aux(
|
||||
matcher.haystack,
|
||||
matcher.pattern,
|
||||
matcher.offset,
|
||||
false,
|
||||
&matcher.captures,
|
||||
)
|
||||
ok = matcher.captures_length > 0 && matcher.err == .OK
|
||||
match := matcher.captures[0]
|
||||
word = matcher.haystack[match.byte_start:match.byte_end]
|
||||
return
|
||||
}
|
||||
|
||||
// get the capture at the "correct" spot, as spot 0 is reserved for the first match
|
||||
matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> string #no_bounds_check {
|
||||
runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1)
|
||||
cap := matcher.captures[index + 1]
|
||||
return matcher.haystack[cap.byte_start:cap.byte_end]
|
||||
}
|
||||
|
||||
// get the raw match out of the captures, skipping spot 0
|
||||
matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> Match #no_bounds_check {
|
||||
runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1)
|
||||
return matcher.captures[index + 1]
|
||||
}
|
||||
|
||||
// alias
|
||||
matcher_gmatch :: matcher_match_iter
|
||||
|
||||
// iteratively match the haystack till it cant find any matches
|
||||
matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: bool) {
|
||||
if len(matcher.iter) > 0 {
|
||||
matcher.captures_length, matcher.err = find_aux(
|
||||
matcher.iter,
|
||||
matcher.pattern,
|
||||
matcher.offset,
|
||||
false,
|
||||
&matcher.captures,
|
||||
)
|
||||
|
||||
if matcher.captures_length != 0 && matcher.err == .OK {
|
||||
ok = true
|
||||
first := matcher.captures_length > 1 ? 1 : 0
|
||||
match := matcher.captures[first]
|
||||
|
||||
// output
|
||||
res = matcher.iter[match.byte_start:match.byte_end]
|
||||
index = matcher.iter_index
|
||||
|
||||
// advance
|
||||
matcher.iter_index += 1
|
||||
matcher.iter = matcher.iter[match.byte_end:]
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// get a slice of all valid captures above the first match
|
||||
matcher_captures_slice :: proc(matcher: ^Matcher) -> []Match {
|
||||
return matcher.captures[1:matcher.captures_length]
|
||||
}
|
||||
@@ -2678,8 +2678,8 @@ gb_internal bool are_types_identical_internal(Type *x, Type *y, bool check_tuple
|
||||
x->Proc.variadic == y->Proc.variadic &&
|
||||
x->Proc.diverging == y->Proc.diverging &&
|
||||
x->Proc.optional_ok == y->Proc.optional_ok &&
|
||||
are_types_identical(x->Proc.params, y->Proc.params) &&
|
||||
are_types_identical(x->Proc.results, y->Proc.results);
|
||||
are_types_identical_internal(x->Proc.params, y->Proc.params, check_tuple_names) &&
|
||||
are_types_identical_internal(x->Proc.results, y->Proc.results, check_tuple_names);
|
||||
|
||||
case Type_Map:
|
||||
return are_types_identical(x->Map.key, y->Map.key) &&
|
||||
|
||||
@@ -2,7 +2,7 @@ ODIN=../../odin
|
||||
PYTHON=$(shell which python3)
|
||||
|
||||
all: download_test_assets image_test compress_test strings_test hash_test crypto_test noise_test encoding_test \
|
||||
math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test c_libc_test
|
||||
math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test match_test c_libc_test
|
||||
|
||||
download_test_assets:
|
||||
$(PYTHON) download_assets.py
|
||||
@@ -49,5 +49,8 @@ os_exit_test:
|
||||
i18n_test:
|
||||
$(ODIN) run text/i18n -out:test_core_i18n
|
||||
|
||||
match_test:
|
||||
$(ODIN) run text/match -out:test_core_match
|
||||
|
||||
c_libc_test:
|
||||
$(ODIN) run c/libc -out:test_core_libc
|
||||
$(ODIN) run c/libc -out:test_core_libc
|
||||
|
||||
@@ -71,6 +71,11 @@ echo Running core:text/i18n tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run text\i18n %COMMON% -out:test_core_i18n.exe
|
||||
|
||||
echo ---
|
||||
echo Running core:text/lua tests
|
||||
echo ---
|
||||
%PATH_TO_ODIN% run text\lua %COMMON% -out:test_core_lua_strlib.exe
|
||||
|
||||
echo ---
|
||||
echo Running core:slice tests
|
||||
echo ---
|
||||
|
||||
396
tests/core/text/match/test_core_text_match.odin
Normal file
396
tests/core/text/match/test_core_text_match.odin
Normal file
@@ -0,0 +1,396 @@
|
||||
package test_strlib
|
||||
|
||||
import "core:text/match"
|
||||
import "core:testing"
|
||||
import "core:fmt"
|
||||
import "core:os"
|
||||
import "core:io"
|
||||
|
||||
TEST_count: int
|
||||
TEST_fail: int
|
||||
|
||||
// inline expect with custom props
|
||||
failed :: proc(t: ^testing.T, ok: bool, loc := #caller_location) -> bool {
|
||||
TEST_count += 1
|
||||
|
||||
if !ok {
|
||||
fmt.wprintf(t.w, "%v: ", loc)
|
||||
t.error_count += 1
|
||||
TEST_fail += 1
|
||||
}
|
||||
|
||||
return !ok
|
||||
}
|
||||
|
||||
expect :: testing.expect
|
||||
|
||||
logf :: proc(t: ^testing.T, format: string, args: ..any) {
|
||||
fmt.wprintf(t.w, format, ..args)
|
||||
}
|
||||
|
||||
// find correct byte offsets
|
||||
@test
|
||||
test_find :: proc(t: ^testing.T) {
|
||||
Entry :: struct {
|
||||
s, p: string,
|
||||
offset: int,
|
||||
|
||||
match: struct {
|
||||
start, end: int, // expected start/end
|
||||
ok: bool,
|
||||
},
|
||||
}
|
||||
|
||||
ENTRIES :: [?]Entry {
|
||||
{ "", "", 0, { 0, 0, true } },
|
||||
{ "alo", "", 0, { 0, 0, true } },
|
||||
{ "a o a o a o", "a", 0, { 0, 1, true } },
|
||||
{ "a o a o a o", "a o", 1, { 4, 7, true } },
|
||||
{ "alo123alo", "12", 0, { 3, 5, true } },
|
||||
{ "alo123alo", "^12", 0, {} },
|
||||
|
||||
// from https://riptutorial.com/lua/example/20535/string-find--introduction-
|
||||
{ "137'5 m47ch s0m3 d1g175", "m%d%d", 0, { 6, 9, true } },
|
||||
{ "stack overflow", "[abc]", 0, { 2, 3, true } },
|
||||
{ "stack overflow", "[^stack ]", 0, { 6, 7, true } },
|
||||
{ "hello", "o%d?", 0, { 4, 5, true } },
|
||||
{ "hello20", "o%d?", 0, { 4, 6, true } },
|
||||
{ "helllllo", "el+", 0, { 1, 7, true } },
|
||||
{ "heo", "el+", 0, {} },
|
||||
{ "helelo", "h.+l", 0, { 0, 5, true } },
|
||||
{ "helelo", "h.-l", 0, { 0, 3, true } },
|
||||
}
|
||||
|
||||
for entry, i in ENTRIES {
|
||||
matcher := match.matcher_init(entry.s, entry.p, entry.offset)
|
||||
start, end, ok := match.matcher_find(&matcher)
|
||||
success := entry.match.ok == ok && start == entry.match.start && end == entry.match.end
|
||||
|
||||
if failed(t, success) {
|
||||
logf(t, "Find %d failed!\n", i)
|
||||
logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p)
|
||||
logf(t, "\tSTART: %d == %d?\n", entry.match.start, start)
|
||||
logf(t, "\tEND: %d == %d?\n", entry.match.end, end)
|
||||
logf(t, "\tErr: %v\tLength %d\n", matcher.err, matcher.captures_length)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_match :: proc(t: ^testing.T) {
|
||||
Entry :: struct {
|
||||
s, p: string,
|
||||
result: string, // expected start/end
|
||||
ok: bool,
|
||||
}
|
||||
|
||||
ENTRIES :: [?]Entry {
|
||||
// star
|
||||
{ "aaab", ".*b", "aaab", true },
|
||||
{ "aaa", ".*a", "aaa", true },
|
||||
{ "b", ".*b", "b", true },
|
||||
|
||||
// plus
|
||||
{ "aaab", ".+b", "aaab", true },
|
||||
{ "aaa", ".+a", "aaa", true },
|
||||
{ "b", ".+b", "", false },
|
||||
|
||||
// question
|
||||
{ "aaab", ".?b", "ab", true },
|
||||
{ "aaa", ".?a", "aa", true },
|
||||
{ "b", ".?b", "b", true },
|
||||
|
||||
// CLASSES, checking shorted invalid patterns
|
||||
{ "a", "%", "", false },
|
||||
|
||||
// %a letter (A-Z, a-z)
|
||||
{ "letterS", "%a+", "letterS", true },
|
||||
{ "Let123", "%a+", "Let", true },
|
||||
{ "Let123", "%A+", "123", true },
|
||||
|
||||
// %c control characters (\n, \t, \r)
|
||||
{ "\n", "%c", "\n", true },
|
||||
{ "\t", "%c", "\t", true },
|
||||
{ "\t", "%C", "", false },
|
||||
{ "a", "%C", "a", true },
|
||||
|
||||
// %d digit characters (0-9)
|
||||
{ "0123", "%d+", "0123", true },
|
||||
{ "abcd", "%D+", "abcd", true },
|
||||
{ "ab23", "%d+", "23", true },
|
||||
|
||||
// %l lower characters (a-z)
|
||||
{ "lowerCASE", "%l+", "lower", true },
|
||||
{ "LOWERcase", "%l+", "case", true },
|
||||
{ "LOWERcase", "%L+", "LOWER", true },
|
||||
|
||||
// %p punctionation characters (!, ?, &, ...)
|
||||
{ "!?&", "%p+", "!?&", true },
|
||||
{ "abc!abc", "%p", "!", true },
|
||||
{ "!abc!", "%P+", "abc", true },
|
||||
|
||||
// %s space characters
|
||||
{ " ", "%s", " ", true },
|
||||
{ "a", "%S", "a", true },
|
||||
{ "abc ", "%s+", " ", true },
|
||||
|
||||
// %u upper characters (A-Z)
|
||||
{ "lowerCASE", "%u+", "CASE", true },
|
||||
{ "LOWERcase", "%u+", "LOWER", true },
|
||||
{ "LOWERcase", "%U+", "case", true },
|
||||
|
||||
// %w alpha numeric (A-Z, a-z, 0-9)
|
||||
{ "0123", "%w+", "0123", true },
|
||||
{ "abcd", "%W+", "", false },
|
||||
{ "ab23", "%w+", "ab23", true },
|
||||
|
||||
// %x hexadecimal digits (0x1A, ...)
|
||||
{ "3", "%x", "3", true },
|
||||
{ "9f", "%x+", "9f", true },
|
||||
{ "9g", "%x+", "9", true },
|
||||
{ "9g", "%X+", "g", true },
|
||||
|
||||
// random tests
|
||||
{ "f123", "%D", "f", true },
|
||||
{ "f123", "%d", "1", true },
|
||||
{ "f123", "%d+", "123", true },
|
||||
{ "foo 123 bar", "%d%d%d", "123", true },
|
||||
{ "Uppercase", "%u", "U", true },
|
||||
{ "abcd", "[bc][bc]", "bc", true },
|
||||
{ "abcd", "[^ad]", "b", true },
|
||||
{ "123", "[0-9]", "1", true },
|
||||
|
||||
// end of line
|
||||
{ "testing this", "this$", "this", true },
|
||||
{ "testing this ", "this$", "", false },
|
||||
{ "testing this$", "this%$$", "this$", true },
|
||||
|
||||
// start of line
|
||||
{ "testing this", "^testing", "testing", true },
|
||||
{ " testing this", "^testing", "", false },
|
||||
{ "testing this", "^%w+", "testing", true },
|
||||
{ " testing this", "^%w+", "", false },
|
||||
|
||||
// balanced string %b
|
||||
{ "testing (this) out", "%b()", "(this)", true },
|
||||
{ "testing athisz out", "%baz", "athisz", true },
|
||||
{ "testing _this_ out", "%b__", "_this_", true },
|
||||
{ "testing _this_ out", "%b_", "", false },
|
||||
}
|
||||
|
||||
for entry, i in ENTRIES {
|
||||
matcher := match.matcher_init(entry.s, entry.p)
|
||||
result, ok := match.matcher_match(&matcher)
|
||||
success := entry.ok == ok && result == entry.result
|
||||
|
||||
if failed(t, success) {
|
||||
logf(t, "Match %d failed!\n", i)
|
||||
logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p)
|
||||
logf(t, "\tResults: WANTED %s\tGOT %s\n", entry.result, result)
|
||||
logf(t, "\tErr: %v\tLength %d\n", matcher.err, matcher.captures_length)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_captures :: proc(t: ^testing.T) {
|
||||
Temp :: struct {
|
||||
pattern: string,
|
||||
captures: [match.MAX_CAPTURES]match.Match,
|
||||
}
|
||||
|
||||
// match all captures
|
||||
compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) {
|
||||
length, err := match.find_aux(haystack, test.pattern, 0, false, &test.captures)
|
||||
if failed(t, len(comp) == length) {
|
||||
logf(t, "Captures Compare Failed -> Lengths %d != %d\n", len(comp), length)
|
||||
}
|
||||
|
||||
for i in 0..<length {
|
||||
cap := test.captures[i]
|
||||
text := haystack[cap.byte_start:cap.byte_end]
|
||||
|
||||
if failed(t, comp[i] == text) {
|
||||
logf(t, "Capture don't equal -> %s != %s\n", comp[i], text)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// match to expected results
|
||||
matches :: proc(t: ^testing.T, test: ^Temp, haystack: string, ok: bool, loc := #caller_location) {
|
||||
length, err := match.find_aux(haystack, test.pattern, 0, false, &test.captures)
|
||||
result := length > 0 && err == .OK
|
||||
|
||||
if failed(t, result == ok) {
|
||||
logf(t, "Capture match failed!\n")
|
||||
logf(t, "\tErr: %v\n", err)
|
||||
logf(t, "\tLength: %v\n", length)
|
||||
}
|
||||
}
|
||||
|
||||
temp := Temp { pattern = "(one).+" }
|
||||
compare_captures(t, &temp, " one two", { "one two", "one" })
|
||||
compare_captures(t, &temp, "three", {})
|
||||
|
||||
matches(t, &temp, "one dog", true)
|
||||
matches(t, &temp, "dog one ", true)
|
||||
matches(t, &temp, "dog one", false)
|
||||
|
||||
temp.pattern = "^(%a+)"
|
||||
matches(t, &temp, "one dog", true)
|
||||
matches(t, &temp, " one dog", false)
|
||||
|
||||
// multiple captures
|
||||
{
|
||||
haystack := " 233 hello dolly"
|
||||
pattern := "%s*(%d+)%s+(%S+)"
|
||||
captures: [match.MAX_CAPTURES]match.Match
|
||||
match.find_aux(haystack, pattern, 0, false, &captures)
|
||||
cap1 := captures[1]
|
||||
cap2 := captures[2]
|
||||
text1 := haystack[cap1.byte_start:cap1.byte_end]
|
||||
text2 := haystack[cap2.byte_start:cap2.byte_end]
|
||||
expect(t, text1 == "233", "Multi-Capture failed at 1")
|
||||
expect(t, text2 == "hello", "Multi-Capture failed at 2")
|
||||
}
|
||||
}
|
||||
|
||||
gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) {
|
||||
if failed(t, a[index] == b) {
|
||||
logf(t, "GMATCH %d failed!\n", index)
|
||||
logf(t, "\t%s != %s\n", a[index], b)
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_gmatch :: proc(t: ^testing.T) {
|
||||
{
|
||||
matcher := match.matcher_init("testing this out 123", "%w+")
|
||||
output := [?]string { "testing", "this", "out", "123" }
|
||||
|
||||
for match, index in match.matcher_gmatch(&matcher) {
|
||||
gmatch_check(t, index, output[:], match)
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
matcher := match.matcher_init("#afdde6", "%x%x")
|
||||
output := [?]string { "af", "dd", "e6" }
|
||||
|
||||
for match, index in match.matcher_gmatch(&matcher) {
|
||||
gmatch_check(t, index, output[:], match)
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
matcher := match.matcher_init("testing outz captures yo outz outtz", "(out)z")
|
||||
output := [?]string { "out", "out" }
|
||||
|
||||
for match, index in match.matcher_gmatch(&matcher) {
|
||||
gmatch_check(t, index, output[:], match)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_gsub :: proc(t: ^testing.T) {
|
||||
result := match.gsub("testing123testing", "%d+", " sup ", context.temp_allocator)
|
||||
expect(t, result == "testing sup testing", "GSUB 0: failed")
|
||||
result = match.gsub("testing123testing", "%a+", "345", context.temp_allocator)
|
||||
expect(t, result == "345123345", "GSUB 1: failed")
|
||||
}
|
||||
|
||||
@test
|
||||
test_gfind :: proc(t: ^testing.T) {
|
||||
haystack := "test1 123 test2 123 test3"
|
||||
pattern := "%w+"
|
||||
captures: [match.MAX_CAPTURES]match.Match
|
||||
s := &haystack
|
||||
output := [?]string { "test1", "123", "test2", "123", "test3" }
|
||||
index: int
|
||||
|
||||
for word in match.gfind(s, pattern, &captures) {
|
||||
if failed(t, output[index] == word) {
|
||||
logf(t, "GFIND %d failed!\n", index)
|
||||
logf(t, "\t%s != %s\n", output[index], word)
|
||||
}
|
||||
index += 1
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_frontier :: proc(t: ^testing.T) {
|
||||
Temp :: struct {
|
||||
t: ^testing.T,
|
||||
index: int,
|
||||
output: [3]string,
|
||||
}
|
||||
|
||||
call :: proc(data: rawptr, word: string, haystack: string, captures: []match.Match) {
|
||||
temp := cast(^Temp) data
|
||||
|
||||
if failed(temp.t, word == temp.output[temp.index]) {
|
||||
logf(temp.t, "GSUB_WITH %d failed!\n", temp.index)
|
||||
logf(temp.t, "\t%s != %s\n", temp.output[temp.index], word)
|
||||
}
|
||||
|
||||
temp.index += 1
|
||||
}
|
||||
|
||||
temp := Temp {
|
||||
t = t,
|
||||
output = {
|
||||
"THE",
|
||||
"QUICK",
|
||||
"JUMPS",
|
||||
},
|
||||
}
|
||||
|
||||
// https://lua-users.org/wiki/FrontierPattern example taken from here
|
||||
match.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call)
|
||||
}
|
||||
|
||||
@test
|
||||
test_utf8 :: proc(t: ^testing.T) {
|
||||
matcher := match.matcher_init("恥ず べき恥 フク恥ロ", "%w+")
|
||||
output := [?]string { "恥ず", "べき恥", "フク恥ロ" }
|
||||
|
||||
for match, index in match.matcher_gmatch(&matcher) {
|
||||
gmatch_check(t, index, output[:], match)
|
||||
}
|
||||
}
|
||||
|
||||
@test
|
||||
test_case_insensitive :: proc(t: ^testing.T) {
|
||||
{
|
||||
pattern := match.pattern_case_insensitive("test", 256, context.temp_allocator)
|
||||
goal := "[tT][eE][sS][tT]"
|
||||
|
||||
if failed(t, pattern == goal) {
|
||||
logf(t, "Case Insensitive Pattern doesn't match result\n")
|
||||
logf(t, "\t%s != %s\n", pattern, goal)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main :: proc() {
|
||||
t: testing.T
|
||||
stream := os.stream_from_handle(os.stdout)
|
||||
w := io.to_writer(stream)
|
||||
t.w = w
|
||||
|
||||
test_find(&t)
|
||||
test_match(&t)
|
||||
test_captures(&t)
|
||||
test_gmatch(&t)
|
||||
test_gsub(&t)
|
||||
test_gfind(&t)
|
||||
test_frontier(&t)
|
||||
test_utf8(&t)
|
||||
test_case_insensitive(&t)
|
||||
|
||||
fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
|
||||
if TEST_fail > 0 {
|
||||
os.exit(1)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user