Merge branch 'master' into compiler-improvements-2022-12

This commit is contained in:
gingerBill
2023-01-01 13:10:49 +00:00
committed by GitHub
13 changed files with 1404 additions and 22 deletions

View File

@@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Close Stale Issues
uses: actions/stale@v4.1.0
uses: actions/stale@v7.0.0
with:
# stale-issue-message: |
# Hello!
@@ -36,7 +36,7 @@ jobs:
# The motivation for this automation is to help prioritize issues in the backlog and not ignore, reject, or belittle anyone..
days-before-stale: 120
days-before-close: 30
days-before-close: -1
exempt-draft-pr: true
ascending: true
operations-per-run: 1000

View File

@@ -153,7 +153,7 @@ scratch_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode,
s := (^Scratch_Allocator)(allocator_data)
if s.data == nil {
DEFAULT_BACKING_SIZE :: 1<<22
DEFAULT_BACKING_SIZE :: 4 * Megabyte
if !(context.allocator.procedure != scratch_allocator_proc &&
context.allocator.data != allocator_data) {
panic("cyclic initialization of the scratch allocator with itself")

View File

@@ -3,11 +3,11 @@ package mem
import "core:runtime"
import "core:intrinsics"
Byte :: 1
Kilobyte :: 1024 * Byte
Megabyte :: 1024 * Kilobyte
Gigabyte :: 1024 * Megabyte
Terabyte :: 1024 * Gigabyte
Byte :: runtime.Byte
Kilobyte :: runtime.Kilobyte
Megabyte :: runtime.Megabyte
Gigabyte :: runtime.Gigabyte
Terabyte :: runtime.Terabyte
set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr {
return runtime.memset(data, i32(value), len)

View File

@@ -19,11 +19,11 @@ Arena :: struct {
// 1 MiB should be enough to start with
DEFAULT_ARENA_STATIC_COMMIT_SIZE :: 1<<20
DEFAULT_ARENA_STATIC_COMMIT_SIZE :: mem.Megabyte
DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: DEFAULT_ARENA_STATIC_COMMIT_SIZE
// 1 GiB on 64-bit systems, 128 MiB on 32-bit systems by default
DEFAULT_ARENA_STATIC_RESERVE_SIZE :: 1<<30 when size_of(uintptr) == 8 else 1<<27
DEFAULT_ARENA_STATIC_RESERVE_SIZE :: mem.Gigabyte when size_of(uintptr) == 8 else 128 * mem.Megabyte

View File

@@ -14,11 +14,12 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F
dirpath: string
dirpath, err = absolute_path_from_handle(fd)
if err != ERROR_NONE {
return
}
defer delete(dirpath)
n := n
size := n
if n <= 0 {

View File

@@ -342,21 +342,33 @@ get_last_error_string :: proc() -> string {
}
open :: proc(path: string, flags: int = O_RDWR, mode: int = 0) -> (Handle, Errno) {
isDir := is_dir_path(path)
flags := flags
if isDir {
/*
@INFO(Platin): To make it impossible to use the wrong flag for dir's
as you can't write to a dir only read which makes it fail to open
*/
flags = O_RDONLY
}
cstr := strings.clone_to_cstring(path, context.temp_allocator)
handle := _unix_open(cstr, i32(flags), u16(mode))
if handle == -1 {
return INVALID_HANDLE, 1
return INVALID_HANDLE, cast(Errno)get_last_error()
}
when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 {
if mode != 0 {
/*
@INFO(Platin): this is only done because O_CREATE for some reason fails to apply mode
should not happen if the handle is a directory
*/
if mode != 0 && !isDir {
err := fchmod(handle, cast(u16)mode)
if err != 0 {
_unix_close(handle)
return INVALID_HANDLE, 1
return INVALID_HANDLE, cast(Errno)err
}
}
}
return handle, 0
}

View File

@@ -329,6 +329,12 @@ Allocator :: struct {
data: rawptr,
}
Byte :: 1
Kilobyte :: 1024 * Byte
Megabyte :: 1024 * Kilobyte
Gigabyte :: 1024 * Megabyte
Terabyte :: 1024 * Gigabyte
// Logging stuff
Logger_Level :: enum uint {

View File

@@ -1,6 +1,6 @@
package runtime
DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 1<<22)
DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 4 * Megabyte)
when ODIN_OS == .Freestanding || ODIN_OS == .JS || ODIN_DEFAULT_TO_NIL_ALLOCATOR {
@@ -197,4 +197,4 @@ default_temp_allocator :: proc(allocator: ^Default_Temp_Allocator) -> Allocator
procedure = default_temp_allocator_proc,
data = allocator,
}
}
}

959
core/text/match/strlib.odin Normal file
View File

@@ -0,0 +1,959 @@
package text_match
import "core:runtime"
import "core:unicode"
import "core:unicode/utf8"
import "core:strings"
MAX_CAPTURES :: 32
Capture :: struct {
init: int,
len: int,
}
Match :: struct {
byte_start, byte_end: int,
}
Error :: enum {
OK,
OOB,
Invalid_Capture_Index,
Invalid_Pattern_Capture,
Unfinished_Capture,
Malformed_Pattern,
Rune_Error,
Match_Invalid,
}
L_ESC :: '%'
CAP_POSITION :: -2
CAP_UNFINISHED :: -1
INVALID :: -1
Match_State :: struct {
src: string,
pattern: string,
level: int,
capture: [MAX_CAPTURES]Capture,
}
match_class :: proc(c: rune, cl: rune) -> (res: bool) {
switch unicode.to_lower(cl) {
case 'a': res = is_alpha(c)
case 'c': res = is_cntrl(c)
case 'd': res = is_digit(c)
case 'g': res = is_graph(c)
case 'l': res = is_lower(c)
case 'p': res = is_punct(c)
case 's': res = is_space(c)
case 'u': res = is_upper(c)
case 'w': res = is_alnum(c)
case 'x': res = is_xdigit(c)
case: return cl == c
}
return is_lower(cl) ? res : !res
}
is_alpha :: unicode.is_alpha
is_digit :: unicode.is_digit
is_lower :: unicode.is_lower
is_upper :: unicode.is_upper
is_punct :: unicode.is_punct
is_space :: unicode.is_space
is_cntrl :: unicode.is_control
is_alnum :: proc(c: rune) -> bool {
return unicode.is_alpha(c) || unicode.is_digit(c)
}
is_graph :: proc(c: rune) -> bool {
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c)
}
is_xdigit :: proc(c: rune) -> bool {
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c)
}
// find the first utf8 charater and its size, return an error if the character is an error
utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) {
c, size = utf8.decode_rune_in_string(bytes)
if c == utf8.RUNE_ERROR {
err = .Rune_Error
}
return
}
// find the first utf8 charater and its size and advance the index
// return an error if the character is an error
utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) {
size: int
c, size = utf8.decode_rune_in_string(bytes[index^:])
if c == utf8.RUNE_ERROR {
err = .Rune_Error
}
index^ += size
return
}
// continuation byte?
is_cont :: proc(b: byte) -> bool {
return b & 0xc0 == 0x80
}
utf8_prev :: proc(bytes: string, a, b: int) -> int {
b := b
for a < b && is_cont(bytes[b - 1]) {
b -= 1
}
return a < b ? b - 1 : a
}
utf8_next :: proc(bytes: string, a: int) -> int {
a := a
b := len(bytes)
for a < b - 1 && is_cont(bytes[a + 1]) {
a += 1
}
return a < b ? a + 1 : b
}
check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) {
l := int(l - '1')
if l < 0 || l >= ms.level || ms.capture[l].len == CAP_UNFINISHED {
return 0, .Invalid_Capture_Index
}
return l, .OK
}
capture_to_close :: proc(ms: ^Match_State) -> (int, Error) {
level := ms.level - 1
for level >= 0 {
if ms.capture[level].len == CAP_UNFINISHED {
return level, .OK
}
level -= 1
}
return 0, .Invalid_Pattern_Capture
}
class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) {
step = p
ch := utf8_advance(ms.pattern, &step) or_return
switch ch {
case L_ESC:
if step == len(ms.pattern) {
err = .Malformed_Pattern
return
}
utf8_advance(ms.pattern, &step) or_return
case '[':
// fine with step by 1
if step + 1 < len(ms.pattern) && ms.pattern[step] == '^' {
step += 1
}
// run till end is reached
for {
if step == len(ms.pattern) {
err = .Malformed_Pattern
return
}
if ms.pattern[step] == ']' {
break
}
// dont care about utf8 here
step += 1
if step < len(ms.pattern) && ms.pattern[step] == L_ESC {
// skip escapes like '%'
step += 1
}
}
// advance last time
step += 1
}
return
}
match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool, err: Error) {
sig = true
p := p
if ms.pattern[p + 1] == '^' {
p += 1
sig = false
}
// while inside of class range
for p < ec {
char := utf8_advance(ms.pattern, &p) or_return
// e.g. %a
if char == L_ESC {
next := utf8_advance(ms.pattern, &p) or_return
if match_class(c, next) {
return
}
} else {
next, next_size := utf8_peek(ms.pattern[p:]) or_return
// TODO test case for [a-???] where ??? is missing
if next == '-' && p + next_size < len(ms.pattern) {
// advance 2 codepoints
p += next_size
last := utf8_advance(ms.pattern, &p) or_return
if char <= c && c <= last {
return
}
} else if char == c {
return
}
}
}
sig = !sig
return
}
single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) {
if s >= len(ms.src) {
return
}
pchar, psize := utf8_peek(ms.pattern[p:]) or_return
schar, ssize := utf8_peek(ms.src[s:]) or_return
schar_size = ssize
switch pchar {
case '.': matched = true
case L_ESC:
pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return
matched = match_class(schar, pchar_next)
case '[': matched = match_bracket_class(ms, schar, p, ep - 1) or_return
case: matched = schar == pchar
}
return
}
match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
if p >= len(ms.pattern) - 1 {
return INVALID, .Invalid_Pattern_Capture
}
schar, ssize := utf8_peek(ms.src[s:]) or_return
pchar, psize := utf8_peek(ms.pattern[p:]) or_return
// skip until the src and pattern match
if schar != pchar {
return INVALID, .OK
}
s_begin := s
cont := 1
s := s + ssize
begin := pchar
end, _ := utf8_peek(ms.pattern[p + psize:]) or_return
for s < len(ms.src) {
ch := utf8_advance(ms.src, &s) or_return
switch ch{
case end:
cont -= 1
if cont == 0 {
return s, .OK
}
case begin:
cont += 1
}
}
return INVALID, .OK
}
max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) {
m := s
// count up matches
for {
matched, size := single_match(ms, m, p, ep) or_return
if !matched {
break
}
m += size
}
for s <= m {
result := match(ms, m, ep + 1) or_return
if result != INVALID {
return result, .OK
}
if s == m {
break
}
m = utf8_prev(ms.src, s, m)
}
return INVALID, .OK
}
min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) {
s := s
for {
result := match(ms, s, ep + 1) or_return
if result != INVALID {
return result, .OK
} else {
// TODO receive next step maybe?
matched, rune_size := single_match(ms, s, p, ep) or_return
if matched {
s += rune_size
} else {
return INVALID, .OK
}
}
}
}
start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Error) {
level := ms.level
ms.capture[level].init = s
ms.capture[level].len = what
ms.level += 1
res = match(ms, s, p) or_return
if res == INVALID {
ms.level -= 1
}
return
}
end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) {
l := capture_to_close(ms) or_return
// TODO double check, could do string as int index
ms.capture[l].len = s - ms.capture[l].init
res = match(ms, s, p) or_return
if res == INVALID {
ms.capture[l].len = CAP_UNFINISHED
}
return
}
match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: Error) {
index := check_capture(ms, char) or_return
length := ms.capture[index].len
if len(ms.src) - s >= length {
return s + length, .OK
}
return INVALID, .OK
}
match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
s := s
p := p
if p == len(ms.pattern) {
return s, .OK
}
// NOTE we can walk by ascii steps if we know the characters are ascii
char, _ := utf8_peek(ms.pattern[p:]) or_return
switch char {
case '(':
if p + 1 < len(ms.pattern) && ms.pattern[p + 1] == ')' {
s = start_capture(ms, s, p + 2, CAP_POSITION) or_return
} else {
s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return
}
case ')':
s = end_capture(ms, s, p + 1) or_return
case '$':
if p + 1 != len(ms.pattern) {
return match_default(ms, s, p)
}
if len(ms.src) != s {
s = INVALID
}
case L_ESC:
// stop short patterns like "%" only
if p + 1 >= len(ms.pattern) {
err = .OOB
return
}
switch ms.pattern[p + 1] {
// balanced string
case 'b':
s = match_balance(ms, s, p + 2) or_return
if s != INVALID {
// eg after %b()
return match(ms, s, p + 4)
}
// frontier
case 'f':
p += 2
if ms.pattern[p] != '[' {
return INVALID, .Invalid_Pattern_Capture
}
ep := class_end(ms, p) or_return
previous, current: rune
// get previous
if s != 0 {
temp := utf8_prev(ms.src, 0, s)
previous, _ = utf8_peek(ms.src[temp:]) or_return
}
// get current
if s != len(ms.src) {
current, _ = utf8_peek(ms.src[s:]) or_return
}
m1 := match_bracket_class(ms, previous, p, ep - 1) or_return
m2 := match_bracket_class(ms, current, p, ep - 1) or_return
if !m1 && m2 {
return match(ms, s, ep)
}
s = INVALID
// capture group
case '0'..<'9':
s = match_capture(ms, s, rune(ms.pattern[p + 1])) or_return
if s != INVALID {
return match(ms, s, p + 2)
}
case: return match_default(ms, s, p)
}
case:
return match_default(ms, s, p)
}
return s, .OK
}
match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) {
s := s
ep := class_end(ms, p) or_return
single_matched, ssize := single_match(ms, s, p, ep) or_return
if !single_matched {
epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
switch epc {
case '*', '?', '-': return match(ms, s, ep + 1)
case: s = INVALID
}
} else {
epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0
switch epc {
case '?':
result := match(ms, s + ssize, ep + 1) or_return
if result != INVALID {
s = result
} else {
return match(ms, s, ep + 1)
}
case '+': s = max_expand(ms, s + ssize, p, ep) or_return
case '*': s = max_expand(ms, s, p, ep) or_return
case '-': s = min_expand(ms, s, p, ep) or_return
case: return match(ms, s + ssize, ep)
}
}
return s, .OK
}
push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []Match) -> (err: Error) {
if i >= ms.level {
if i == 0 {
matches[0] = { 0, e - s }
} else {
err = .Invalid_Capture_Index
}
} else {
init := ms.capture[i].init
length := ms.capture[i].len
switch length {
case CAP_UNFINISHED: err = .Unfinished_Capture
case CAP_POSITION: matches[i] = { init, init + 1 }
case: matches[i] = { init, init + length }
}
}
return
}
push_captures :: proc(
ms: ^Match_State,
s: int,
e: int,
matches: []Match,
) -> (nlevels: int, err: Error) {
nlevels = 1 if ms.level == 0 && s != -1 else ms.level
for i in 0..<nlevels {
push_onecapture(ms, i, s, e, matches) or_return
}
return
}
// SPECIALS := "^$*+?.([%-"
// all special characters inside a small ascii array
SPECIALS_TABLE := [256]bool {
'^' = true,
'$' = true,
'*' = true,
'+' = true,
'?' = true,
'.' = true,
'(' = true,
'[' = true,
'%' = true,
'-' = true,
}
// helper call to quick search for special characters
index_special :: proc(text: string) -> int {
for i in 0..<len(text) {
if SPECIALS_TABLE[text[i]] {
return i
}
}
return -1
}
lmem_find :: proc(s1, s2: string) -> int {
l1 := len(s1)
l2 := len(s2)
if l2 == 0 {
return 0
} else if l2 > l1 {
return -1
} else {
init := strings.index_byte(s1, s2[0])
end := init + l2
for end <= l1 && init != -1 {
init += 1
if s1[init - 1:end] == s2 {
return init - 1
} else {
next := strings.index_byte(s1[init:], s2[0])
if next == -1 {
return -1
} else {
init = init + next
end = init + l2
}
}
}
}
return -1
}
// find a pattern with in a haystack with an offset
// allow_memfind will speed up simple searches
find_aux :: proc(
haystack: string,
pattern: string,
offset: int,
allow_memfind: bool,
matches: ^[MAX_CAPTURES]Match,
) -> (captures: int, err: Error) {
s := offset
p := 0
specials_idx := index_special(pattern)
if allow_memfind && specials_idx == -1 {
if index := lmem_find(haystack[s:], pattern); index != -1 {
matches[0] = { index + s, index + s + len(pattern) }
captures = 1
return
} else {
return
}
}
pattern := pattern
anchor: bool
if len(pattern) > 0 && pattern[0] == '^' {
anchor = true
pattern = pattern[1:]
}
ms := Match_State {
src = haystack,
pattern = pattern,
}
for {
res := match(&ms, s, p) or_return
if res != INVALID {
// disallow non advancing match
if s == res {
err = .Match_Invalid
}
// NOTE(Skytrias): first result is reserved for a full match
matches[0] = { s, res }
// rest are the actual captures
captures = push_captures(&ms, -1, -1, matches[1:]) or_return
captures += 1
return
}
s += 1
if !(s < len(ms.src) && !anchor) {
break
}
}
return
}
// iterative matching which returns the 0th/1st match
// rest has to be used from captures
gmatch :: proc(
haystack: ^string,
pattern: string,
captures: ^[MAX_CAPTURES]Match,
) -> (res: string, ok: bool) {
if len(haystack) > 0 {
length, err := find_aux(haystack^, pattern, 0, false, captures)
if length != 0 && err == .OK {
ok = true
first := length > 1 ? 1 : 0
cap := captures[first]
res = haystack[cap.byte_start:cap.byte_end]
haystack^ = haystack[cap.byte_end:]
}
}
return
}
// gsub with builder, replace patterns found with the replace content
gsub_builder :: proc(
builder: ^strings.Builder,
haystack: string,
pattern: string,
replace: string,
) -> string {
// find matches
captures: [MAX_CAPTURES]Match
haystack := haystack
for {
length, err := find_aux(haystack, pattern, 0, false, &captures)
// done
if length == 0 {
break
}
if err != .OK {
return {}
}
cap := captures[0]
// write front till capture
strings.write_string(builder, haystack[:cap.byte_start])
// write replacements
strings.write_string(builder, replace)
// advance string till end
haystack = haystack[cap.byte_end:]
}
strings.write_string(builder, haystack[:])
return strings.to_string(builder^)
}
// uses temp builder to build initial string - then allocates the result
gsub_allocator :: proc(
haystack: string,
pattern: string,
replace: string,
allocator := context.allocator,
) -> string {
builder := strings.builder_make(0, 256, context.temp_allocator)
return gsub_builder(&builder, haystack, pattern, replace)
}
Gsub_Proc :: proc(
// optional passed data
data: rawptr,
// word match found
word: string,
// current haystack for found captures
haystack: string,
// found captures - empty for no captures
captures: []Match,
)
// call a procedure on every match in the haystack
gsub_with :: proc(
haystack: string,
pattern: string,
data: rawptr,
call: Gsub_Proc,
) {
// find matches
captures: [MAX_CAPTURES]Match
haystack := haystack
for {
length, err := find_aux(haystack, pattern, 0, false, &captures)
// done
if length == 0 || err != .OK {
break
}
cap := captures[0]
word := haystack[cap.byte_start:cap.byte_end]
call(data, word, haystack, captures[1:length])
// advance string till end
haystack = haystack[cap.byte_end:]
}
}
gsub :: proc { gsub_builder, gsub_allocator }
// iterative find with zeroth capture only
gfind :: proc(
haystack: ^string,
pattern: string,
captures: ^[MAX_CAPTURES]Match,
) -> (res: string, ok: bool) {
if len(haystack) > 0 {
length, err := find_aux(haystack^, pattern, 0, true, captures)
if length != 0 && err == .OK {
ok = true
cap := captures[0]
res = haystack[cap.byte_start:cap.byte_end]
haystack^ = haystack[cap.byte_end:]
}
}
return
}
// rebuilds a pattern into a case insensitive pattern
pattern_case_insensitive_builder :: proc(
builder: ^strings.Builder,
pattern: string,
) -> (res: string) {
p := pattern
last_percent: bool
for len(p) > 0 {
char, size := utf8.decode_rune_in_string(p)
if unicode.is_alpha(char) && !last_percent {
// write character class in manually
strings.write_byte(builder, '[')
strings.write_rune(builder, unicode.to_lower(char))
strings.write_rune(builder, unicode.to_upper(char))
strings.write_byte(builder, ']')
} else {
strings.write_rune(builder, char)
}
last_percent = char == L_ESC
p = p[size:]
}
return strings.to_string(builder^)
}
pattern_case_insensitive_allocator :: proc(
pattern: string,
cap: int = 256,
allocator := context.allocator,
) -> (res: string) {
builder := strings.builder_make(0, cap, context.temp_allocator)
return pattern_case_insensitive_builder(&builder, pattern)
}
pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator }
// Matcher helper struct that stores optional data you might want to use or not
// as lua is far more dynamic this helps dealing with too much data
// this also allows use of find/match/gmatch at through one struct
Matcher :: struct {
haystack: string,
pattern: string,
captures: [MAX_CAPTURES]Match,
captures_length: int,
offset: int,
err: Error,
// changing content for iterators
iter: string,
iter_index: int,
}
// init using haystack & pattern and an optional byte offset
matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matcher) {
res.haystack = haystack
res.pattern = pattern
res.offset = offset
res.iter = haystack
return
}
// find the first match and return the byte start / end position in the string, true on success
matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bounds_check {
matcher.captures_length, matcher.err = find_aux(
matcher.haystack,
matcher.pattern,
matcher.offset,
true,
&matcher.captures,
)
ok = matcher.captures_length > 0 && matcher.err == .OK
match := matcher.captures[0]
start = match.byte_start
end = match.byte_end
return
}
// find the first match and return the matched word, true on success
matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_check {
matcher.captures_length, matcher.err = find_aux(
matcher.haystack,
matcher.pattern,
matcher.offset,
false,
&matcher.captures,
)
ok = matcher.captures_length > 0 && matcher.err == .OK
match := matcher.captures[0]
word = matcher.haystack[match.byte_start:match.byte_end]
return
}
// get the capture at the "correct" spot, as spot 0 is reserved for the first match
matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> string #no_bounds_check {
runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1)
cap := matcher.captures[index + 1]
return matcher.haystack[cap.byte_start:cap.byte_end]
}
// get the raw match out of the captures, skipping spot 0
matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> Match #no_bounds_check {
runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1)
return matcher.captures[index + 1]
}
// alias
matcher_gmatch :: matcher_match_iter
// iteratively match the haystack till it cant find any matches
matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: bool) {
if len(matcher.iter) > 0 {
matcher.captures_length, matcher.err = find_aux(
matcher.iter,
matcher.pattern,
matcher.offset,
false,
&matcher.captures,
)
if matcher.captures_length != 0 && matcher.err == .OK {
ok = true
first := matcher.captures_length > 1 ? 1 : 0
match := matcher.captures[first]
// output
res = matcher.iter[match.byte_start:match.byte_end]
index = matcher.iter_index
// advance
matcher.iter_index += 1
matcher.iter = matcher.iter[match.byte_end:]
}
}
return
}
// get a slice of all valid captures above the first match
matcher_captures_slice :: proc(matcher: ^Matcher) -> []Match {
return matcher.captures[1:matcher.captures_length]
}

View File

@@ -2678,8 +2678,8 @@ gb_internal bool are_types_identical_internal(Type *x, Type *y, bool check_tuple
x->Proc.variadic == y->Proc.variadic &&
x->Proc.diverging == y->Proc.diverging &&
x->Proc.optional_ok == y->Proc.optional_ok &&
are_types_identical(x->Proc.params, y->Proc.params) &&
are_types_identical(x->Proc.results, y->Proc.results);
are_types_identical_internal(x->Proc.params, y->Proc.params, check_tuple_names) &&
are_types_identical_internal(x->Proc.results, y->Proc.results, check_tuple_names);
case Type_Map:
return are_types_identical(x->Map.key, y->Map.key) &&

View File

@@ -2,7 +2,7 @@ ODIN=../../odin
PYTHON=$(shell which python3)
all: download_test_assets image_test compress_test strings_test hash_test crypto_test noise_test encoding_test \
math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test c_libc_test
math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test match_test c_libc_test
download_test_assets:
$(PYTHON) download_assets.py
@@ -49,5 +49,8 @@ os_exit_test:
i18n_test:
$(ODIN) run text/i18n -out:test_core_i18n
match_test:
$(ODIN) run text/match -out:test_core_match
c_libc_test:
$(ODIN) run c/libc -out:test_core_libc
$(ODIN) run c/libc -out:test_core_libc

View File

@@ -71,6 +71,11 @@ echo Running core:text/i18n tests
echo ---
%PATH_TO_ODIN% run text\i18n %COMMON% -out:test_core_i18n.exe
echo ---
echo Running core:text/lua tests
echo ---
%PATH_TO_ODIN% run text\lua %COMMON% -out:test_core_lua_strlib.exe
echo ---
echo Running core:slice tests
echo ---

View File

@@ -0,0 +1,396 @@
package test_strlib
import "core:text/match"
import "core:testing"
import "core:fmt"
import "core:os"
import "core:io"
TEST_count: int
TEST_fail: int
// inline expect with custom props
failed :: proc(t: ^testing.T, ok: bool, loc := #caller_location) -> bool {
TEST_count += 1
if !ok {
fmt.wprintf(t.w, "%v: ", loc)
t.error_count += 1
TEST_fail += 1
}
return !ok
}
expect :: testing.expect
logf :: proc(t: ^testing.T, format: string, args: ..any) {
fmt.wprintf(t.w, format, ..args)
}
// find correct byte offsets
@test
test_find :: proc(t: ^testing.T) {
Entry :: struct {
s, p: string,
offset: int,
match: struct {
start, end: int, // expected start/end
ok: bool,
},
}
ENTRIES :: [?]Entry {
{ "", "", 0, { 0, 0, true } },
{ "alo", "", 0, { 0, 0, true } },
{ "a o a o a o", "a", 0, { 0, 1, true } },
{ "a o a o a o", "a o", 1, { 4, 7, true } },
{ "alo123alo", "12", 0, { 3, 5, true } },
{ "alo123alo", "^12", 0, {} },
// from https://riptutorial.com/lua/example/20535/string-find--introduction-
{ "137'5 m47ch s0m3 d1g175", "m%d%d", 0, { 6, 9, true } },
{ "stack overflow", "[abc]", 0, { 2, 3, true } },
{ "stack overflow", "[^stack ]", 0, { 6, 7, true } },
{ "hello", "o%d?", 0, { 4, 5, true } },
{ "hello20", "o%d?", 0, { 4, 6, true } },
{ "helllllo", "el+", 0, { 1, 7, true } },
{ "heo", "el+", 0, {} },
{ "helelo", "h.+l", 0, { 0, 5, true } },
{ "helelo", "h.-l", 0, { 0, 3, true } },
}
for entry, i in ENTRIES {
matcher := match.matcher_init(entry.s, entry.p, entry.offset)
start, end, ok := match.matcher_find(&matcher)
success := entry.match.ok == ok && start == entry.match.start && end == entry.match.end
if failed(t, success) {
logf(t, "Find %d failed!\n", i)
logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p)
logf(t, "\tSTART: %d == %d?\n", entry.match.start, start)
logf(t, "\tEND: %d == %d?\n", entry.match.end, end)
logf(t, "\tErr: %v\tLength %d\n", matcher.err, matcher.captures_length)
}
}
}
@test
test_match :: proc(t: ^testing.T) {
Entry :: struct {
s, p: string,
result: string, // expected start/end
ok: bool,
}
ENTRIES :: [?]Entry {
// star
{ "aaab", ".*b", "aaab", true },
{ "aaa", ".*a", "aaa", true },
{ "b", ".*b", "b", true },
// plus
{ "aaab", ".+b", "aaab", true },
{ "aaa", ".+a", "aaa", true },
{ "b", ".+b", "", false },
// question
{ "aaab", ".?b", "ab", true },
{ "aaa", ".?a", "aa", true },
{ "b", ".?b", "b", true },
// CLASSES, checking shorted invalid patterns
{ "a", "%", "", false },
// %a letter (A-Z, a-z)
{ "letterS", "%a+", "letterS", true },
{ "Let123", "%a+", "Let", true },
{ "Let123", "%A+", "123", true },
// %c control characters (\n, \t, \r)
{ "\n", "%c", "\n", true },
{ "\t", "%c", "\t", true },
{ "\t", "%C", "", false },
{ "a", "%C", "a", true },
// %d digit characters (0-9)
{ "0123", "%d+", "0123", true },
{ "abcd", "%D+", "abcd", true },
{ "ab23", "%d+", "23", true },
// %l lower characters (a-z)
{ "lowerCASE", "%l+", "lower", true },
{ "LOWERcase", "%l+", "case", true },
{ "LOWERcase", "%L+", "LOWER", true },
// %p punctionation characters (!, ?, &, ...)
{ "!?&", "%p+", "!?&", true },
{ "abc!abc", "%p", "!", true },
{ "!abc!", "%P+", "abc", true },
// %s space characters
{ " ", "%s", " ", true },
{ "a", "%S", "a", true },
{ "abc ", "%s+", " ", true },
// %u upper characters (A-Z)
{ "lowerCASE", "%u+", "CASE", true },
{ "LOWERcase", "%u+", "LOWER", true },
{ "LOWERcase", "%U+", "case", true },
// %w alpha numeric (A-Z, a-z, 0-9)
{ "0123", "%w+", "0123", true },
{ "abcd", "%W+", "", false },
{ "ab23", "%w+", "ab23", true },
// %x hexadecimal digits (0x1A, ...)
{ "3", "%x", "3", true },
{ "9f", "%x+", "9f", true },
{ "9g", "%x+", "9", true },
{ "9g", "%X+", "g", true },
// random tests
{ "f123", "%D", "f", true },
{ "f123", "%d", "1", true },
{ "f123", "%d+", "123", true },
{ "foo 123 bar", "%d%d%d", "123", true },
{ "Uppercase", "%u", "U", true },
{ "abcd", "[bc][bc]", "bc", true },
{ "abcd", "[^ad]", "b", true },
{ "123", "[0-9]", "1", true },
// end of line
{ "testing this", "this$", "this", true },
{ "testing this ", "this$", "", false },
{ "testing this$", "this%$$", "this$", true },
// start of line
{ "testing this", "^testing", "testing", true },
{ " testing this", "^testing", "", false },
{ "testing this", "^%w+", "testing", true },
{ " testing this", "^%w+", "", false },
// balanced string %b
{ "testing (this) out", "%b()", "(this)", true },
{ "testing athisz out", "%baz", "athisz", true },
{ "testing _this_ out", "%b__", "_this_", true },
{ "testing _this_ out", "%b_", "", false },
}
for entry, i in ENTRIES {
matcher := match.matcher_init(entry.s, entry.p)
result, ok := match.matcher_match(&matcher)
success := entry.ok == ok && result == entry.result
if failed(t, success) {
logf(t, "Match %d failed!\n", i)
logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p)
logf(t, "\tResults: WANTED %s\tGOT %s\n", entry.result, result)
logf(t, "\tErr: %v\tLength %d\n", matcher.err, matcher.captures_length)
}
}
}
@test
test_captures :: proc(t: ^testing.T) {
Temp :: struct {
pattern: string,
captures: [match.MAX_CAPTURES]match.Match,
}
// match all captures
compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) {
length, err := match.find_aux(haystack, test.pattern, 0, false, &test.captures)
if failed(t, len(comp) == length) {
logf(t, "Captures Compare Failed -> Lengths %d != %d\n", len(comp), length)
}
for i in 0..<length {
cap := test.captures[i]
text := haystack[cap.byte_start:cap.byte_end]
if failed(t, comp[i] == text) {
logf(t, "Capture don't equal -> %s != %s\n", comp[i], text)
}
}
}
// match to expected results
matches :: proc(t: ^testing.T, test: ^Temp, haystack: string, ok: bool, loc := #caller_location) {
length, err := match.find_aux(haystack, test.pattern, 0, false, &test.captures)
result := length > 0 && err == .OK
if failed(t, result == ok) {
logf(t, "Capture match failed!\n")
logf(t, "\tErr: %v\n", err)
logf(t, "\tLength: %v\n", length)
}
}
temp := Temp { pattern = "(one).+" }
compare_captures(t, &temp, " one two", { "one two", "one" })
compare_captures(t, &temp, "three", {})
matches(t, &temp, "one dog", true)
matches(t, &temp, "dog one ", true)
matches(t, &temp, "dog one", false)
temp.pattern = "^(%a+)"
matches(t, &temp, "one dog", true)
matches(t, &temp, " one dog", false)
// multiple captures
{
haystack := " 233 hello dolly"
pattern := "%s*(%d+)%s+(%S+)"
captures: [match.MAX_CAPTURES]match.Match
match.find_aux(haystack, pattern, 0, false, &captures)
cap1 := captures[1]
cap2 := captures[2]
text1 := haystack[cap1.byte_start:cap1.byte_end]
text2 := haystack[cap2.byte_start:cap2.byte_end]
expect(t, text1 == "233", "Multi-Capture failed at 1")
expect(t, text2 == "hello", "Multi-Capture failed at 2")
}
}
gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) {
if failed(t, a[index] == b) {
logf(t, "GMATCH %d failed!\n", index)
logf(t, "\t%s != %s\n", a[index], b)
}
}
@test
test_gmatch :: proc(t: ^testing.T) {
{
matcher := match.matcher_init("testing this out 123", "%w+")
output := [?]string { "testing", "this", "out", "123" }
for match, index in match.matcher_gmatch(&matcher) {
gmatch_check(t, index, output[:], match)
}
}
{
matcher := match.matcher_init("#afdde6", "%x%x")
output := [?]string { "af", "dd", "e6" }
for match, index in match.matcher_gmatch(&matcher) {
gmatch_check(t, index, output[:], match)
}
}
{
matcher := match.matcher_init("testing outz captures yo outz outtz", "(out)z")
output := [?]string { "out", "out" }
for match, index in match.matcher_gmatch(&matcher) {
gmatch_check(t, index, output[:], match)
}
}
}
@test
test_gsub :: proc(t: ^testing.T) {
result := match.gsub("testing123testing", "%d+", " sup ", context.temp_allocator)
expect(t, result == "testing sup testing", "GSUB 0: failed")
result = match.gsub("testing123testing", "%a+", "345", context.temp_allocator)
expect(t, result == "345123345", "GSUB 1: failed")
}
@test
test_gfind :: proc(t: ^testing.T) {
haystack := "test1 123 test2 123 test3"
pattern := "%w+"
captures: [match.MAX_CAPTURES]match.Match
s := &haystack
output := [?]string { "test1", "123", "test2", "123", "test3" }
index: int
for word in match.gfind(s, pattern, &captures) {
if failed(t, output[index] == word) {
logf(t, "GFIND %d failed!\n", index)
logf(t, "\t%s != %s\n", output[index], word)
}
index += 1
}
}
@test
test_frontier :: proc(t: ^testing.T) {
Temp :: struct {
t: ^testing.T,
index: int,
output: [3]string,
}
call :: proc(data: rawptr, word: string, haystack: string, captures: []match.Match) {
temp := cast(^Temp) data
if failed(temp.t, word == temp.output[temp.index]) {
logf(temp.t, "GSUB_WITH %d failed!\n", temp.index)
logf(temp.t, "\t%s != %s\n", temp.output[temp.index], word)
}
temp.index += 1
}
temp := Temp {
t = t,
output = {
"THE",
"QUICK",
"JUMPS",
},
}
// https://lua-users.org/wiki/FrontierPattern example taken from here
match.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call)
}
@test
test_utf8 :: proc(t: ^testing.T) {
matcher := match.matcher_init("恥ず べき恥 フク恥ロ", "%w+")
output := [?]string { "恥ず", "べき恥", "フク恥ロ" }
for match, index in match.matcher_gmatch(&matcher) {
gmatch_check(t, index, output[:], match)
}
}
@test
test_case_insensitive :: proc(t: ^testing.T) {
{
pattern := match.pattern_case_insensitive("test", 256, context.temp_allocator)
goal := "[tT][eE][sS][tT]"
if failed(t, pattern == goal) {
logf(t, "Case Insensitive Pattern doesn't match result\n")
logf(t, "\t%s != %s\n", pattern, goal)
}
}
}
main :: proc() {
t: testing.T
stream := os.stream_from_handle(os.stdout)
w := io.to_writer(stream)
t.w = w
test_find(&t)
test_match(&t)
test_captures(&t)
test_gmatch(&t)
test_gsub(&t)
test_gfind(&t)
test_frontier(&t)
test_utf8(&t)
test_case_insensitive(&t)
fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count)
if TEST_fail > 0 {
os.exit(1)
}
}