diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 341a09409..10f193056 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Close Stale Issues - uses: actions/stale@v4.1.0 + uses: actions/stale@v7.0.0 with: # stale-issue-message: | # Hello! @@ -36,7 +36,7 @@ jobs: # The motivation for this automation is to help prioritize issues in the backlog and not ignore, reject, or belittle anyone.. days-before-stale: 120 - days-before-close: 30 + days-before-close: -1 exempt-draft-pr: true ascending: true operations-per-run: 1000 diff --git a/core/mem/allocators.odin b/core/mem/allocators.odin index fc009621b..66da12959 100644 --- a/core/mem/allocators.odin +++ b/core/mem/allocators.odin @@ -153,7 +153,7 @@ scratch_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode, s := (^Scratch_Allocator)(allocator_data) if s.data == nil { - DEFAULT_BACKING_SIZE :: 1<<22 + DEFAULT_BACKING_SIZE :: 4 * Megabyte if !(context.allocator.procedure != scratch_allocator_proc && context.allocator.data != allocator_data) { panic("cyclic initialization of the scratch allocator with itself") diff --git a/core/mem/mem.odin b/core/mem/mem.odin index f7be69adc..bc77ca287 100644 --- a/core/mem/mem.odin +++ b/core/mem/mem.odin @@ -3,11 +3,11 @@ package mem import "core:runtime" import "core:intrinsics" -Byte :: 1 -Kilobyte :: 1024 * Byte -Megabyte :: 1024 * Kilobyte -Gigabyte :: 1024 * Megabyte -Terabyte :: 1024 * Gigabyte +Byte :: runtime.Byte +Kilobyte :: runtime.Kilobyte +Megabyte :: runtime.Megabyte +Gigabyte :: runtime.Gigabyte +Terabyte :: runtime.Terabyte set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr { return runtime.memset(data, i32(value), len) diff --git a/core/mem/virtual/arena.odin b/core/mem/virtual/arena.odin index e901cf6f3..0ddb116fd 100644 --- a/core/mem/virtual/arena.odin +++ b/core/mem/virtual/arena.odin @@ -19,11 +19,11 @@ Arena :: struct { // 1 MiB should be enough to start with -DEFAULT_ARENA_STATIC_COMMIT_SIZE :: 1<<20 +DEFAULT_ARENA_STATIC_COMMIT_SIZE :: mem.Megabyte DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: DEFAULT_ARENA_STATIC_COMMIT_SIZE // 1 GiB on 64-bit systems, 128 MiB on 32-bit systems by default -DEFAULT_ARENA_STATIC_RESERVE_SIZE :: 1<<30 when size_of(uintptr) == 8 else 1<<27 +DEFAULT_ARENA_STATIC_RESERVE_SIZE :: mem.Gigabyte when size_of(uintptr) == 8 else 128 * mem.Megabyte diff --git a/core/os/dir_darwin.odin b/core/os/dir_darwin.odin index 1f54ef1ab..7d0f2936d 100644 --- a/core/os/dir_darwin.odin +++ b/core/os/dir_darwin.odin @@ -14,11 +14,12 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F dirpath: string dirpath, err = absolute_path_from_handle(fd) - if err != ERROR_NONE { return } + defer delete(dirpath) + n := n size := n if n <= 0 { diff --git a/core/os/os_darwin.odin b/core/os/os_darwin.odin index 4c32323ff..b40edb410 100644 --- a/core/os/os_darwin.odin +++ b/core/os/os_darwin.odin @@ -342,21 +342,33 @@ get_last_error_string :: proc() -> string { } open :: proc(path: string, flags: int = O_RDWR, mode: int = 0) -> (Handle, Errno) { + isDir := is_dir_path(path) + flags := flags + if isDir { + /* + @INFO(Platin): To make it impossible to use the wrong flag for dir's + as you can't write to a dir only read which makes it fail to open + */ + flags = O_RDONLY + } + cstr := strings.clone_to_cstring(path, context.temp_allocator) handle := _unix_open(cstr, i32(flags), u16(mode)) if handle == -1 { - return INVALID_HANDLE, 1 + return INVALID_HANDLE, cast(Errno)get_last_error() } -when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 { - if mode != 0 { + /* + @INFO(Platin): this is only done because O_CREATE for some reason fails to apply mode + should not happen if the handle is a directory + */ + if mode != 0 && !isDir { err := fchmod(handle, cast(u16)mode) if err != 0 { _unix_close(handle) - return INVALID_HANDLE, 1 + return INVALID_HANDLE, cast(Errno)err } } -} return handle, 0 } diff --git a/core/runtime/core.odin b/core/runtime/core.odin index 108609f78..a74bf4285 100644 --- a/core/runtime/core.odin +++ b/core/runtime/core.odin @@ -329,6 +329,12 @@ Allocator :: struct { data: rawptr, } +Byte :: 1 +Kilobyte :: 1024 * Byte +Megabyte :: 1024 * Kilobyte +Gigabyte :: 1024 * Megabyte +Terabyte :: 1024 * Gigabyte + // Logging stuff Logger_Level :: enum uint { diff --git a/core/runtime/default_temporary_allocator.odin b/core/runtime/default_temporary_allocator.odin index 176634ff9..b71cd103a 100644 --- a/core/runtime/default_temporary_allocator.odin +++ b/core/runtime/default_temporary_allocator.odin @@ -1,6 +1,6 @@ package runtime -DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 1<<22) +DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 4 * Megabyte) when ODIN_OS == .Freestanding || ODIN_OS == .JS || ODIN_DEFAULT_TO_NIL_ALLOCATOR { @@ -197,4 +197,4 @@ default_temp_allocator :: proc(allocator: ^Default_Temp_Allocator) -> Allocator procedure = default_temp_allocator_proc, data = allocator, } -} \ No newline at end of file +} diff --git a/core/text/match/strlib.odin b/core/text/match/strlib.odin new file mode 100644 index 000000000..b8c2861fa --- /dev/null +++ b/core/text/match/strlib.odin @@ -0,0 +1,959 @@ +package text_match + +import "core:runtime" +import "core:unicode" +import "core:unicode/utf8" +import "core:strings" + +MAX_CAPTURES :: 32 + +Capture :: struct { + init: int, + len: int, +} + +Match :: struct { + byte_start, byte_end: int, +} + +Error :: enum { + OK, + OOB, + Invalid_Capture_Index, + Invalid_Pattern_Capture, + Unfinished_Capture, + Malformed_Pattern, + Rune_Error, + Match_Invalid, +} + +L_ESC :: '%' +CAP_POSITION :: -2 +CAP_UNFINISHED :: -1 +INVALID :: -1 + +Match_State :: struct { + src: string, + pattern: string, + level: int, + capture: [MAX_CAPTURES]Capture, +} + +match_class :: proc(c: rune, cl: rune) -> (res: bool) { + switch unicode.to_lower(cl) { + case 'a': res = is_alpha(c) + case 'c': res = is_cntrl(c) + case 'd': res = is_digit(c) + case 'g': res = is_graph(c) + case 'l': res = is_lower(c) + case 'p': res = is_punct(c) + case 's': res = is_space(c) + case 'u': res = is_upper(c) + case 'w': res = is_alnum(c) + case 'x': res = is_xdigit(c) + case: return cl == c + } + + return is_lower(cl) ? res : !res +} + +is_alpha :: unicode.is_alpha +is_digit :: unicode.is_digit +is_lower :: unicode.is_lower +is_upper :: unicode.is_upper +is_punct :: unicode.is_punct +is_space :: unicode.is_space +is_cntrl :: unicode.is_control + +is_alnum :: proc(c: rune) -> bool { + return unicode.is_alpha(c) || unicode.is_digit(c) +} + +is_graph :: proc(c: rune) -> bool { + return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c) +} + +is_xdigit :: proc(c: rune) -> bool { + return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c) +} + +// find the first utf8 charater and its size, return an error if the character is an error +utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) { + c, size = utf8.decode_rune_in_string(bytes) + + if c == utf8.RUNE_ERROR { + err = .Rune_Error + } + + return +} + +// find the first utf8 charater and its size and advance the index +// return an error if the character is an error +utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) { + size: int + c, size = utf8.decode_rune_in_string(bytes[index^:]) + + if c == utf8.RUNE_ERROR { + err = .Rune_Error + } + + index^ += size + return +} + +// continuation byte? +is_cont :: proc(b: byte) -> bool { + return b & 0xc0 == 0x80 +} + +utf8_prev :: proc(bytes: string, a, b: int) -> int { + b := b + + for a < b && is_cont(bytes[b - 1]) { + b -= 1 + } + + return a < b ? b - 1 : a +} + +utf8_next :: proc(bytes: string, a: int) -> int { + a := a + b := len(bytes) + + for a < b - 1 && is_cont(bytes[a + 1]) { + a += 1 + } + + return a < b ? a + 1 : b +} + +check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) { + l := int(l - '1') + + if l < 0 || l >= ms.level || ms.capture[l].len == CAP_UNFINISHED { + return 0, .Invalid_Capture_Index + } + + return l, .OK +} + +capture_to_close :: proc(ms: ^Match_State) -> (int, Error) { + level := ms.level - 1 + + for level >= 0 { + if ms.capture[level].len == CAP_UNFINISHED { + return level, .OK + } + + level -= 1 + } + + return 0, .Invalid_Pattern_Capture +} + +class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) { + step = p + ch := utf8_advance(ms.pattern, &step) or_return + + switch ch { + case L_ESC: + if step == len(ms.pattern) { + err = .Malformed_Pattern + return + } + + utf8_advance(ms.pattern, &step) or_return + + case '[': + // fine with step by 1 + if step + 1 < len(ms.pattern) && ms.pattern[step] == '^' { + step += 1 + } + + // run till end is reached + for { + if step == len(ms.pattern) { + err = .Malformed_Pattern + return + } + + if ms.pattern[step] == ']' { + break + } + + // dont care about utf8 here + step += 1 + + if step < len(ms.pattern) && ms.pattern[step] == L_ESC { + // skip escapes like '%' + step += 1 + } + } + + // advance last time + step += 1 + } + + return +} + +match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool, err: Error) { + sig = true + p := p + + if ms.pattern[p + 1] == '^' { + p += 1 + sig = false + } + + // while inside of class range + for p < ec { + char := utf8_advance(ms.pattern, &p) or_return + + // e.g. %a + if char == L_ESC { + next := utf8_advance(ms.pattern, &p) or_return + + if match_class(c, next) { + return + } + } else { + next, next_size := utf8_peek(ms.pattern[p:]) or_return + + // TODO test case for [a-???] where ??? is missing + if next == '-' && p + next_size < len(ms.pattern) { + // advance 2 codepoints + p += next_size + last := utf8_advance(ms.pattern, &p) or_return + + if char <= c && c <= last { + return + } + } else if char == c { + return + } + } + } + + sig = !sig + return +} + +single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) { + if s >= len(ms.src) { + return + } + + pchar, psize := utf8_peek(ms.pattern[p:]) or_return + schar, ssize := utf8_peek(ms.src[s:]) or_return + schar_size = ssize + + switch pchar { + case '.': matched = true + case L_ESC: + pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return + matched = match_class(schar, pchar_next) + case '[': matched = match_bracket_class(ms, schar, p, ep - 1) or_return + case: matched = schar == pchar + } + + return +} + +match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { + if p >= len(ms.pattern) - 1 { + return INVALID, .Invalid_Pattern_Capture + } + + schar, ssize := utf8_peek(ms.src[s:]) or_return + pchar, psize := utf8_peek(ms.pattern[p:]) or_return + + // skip until the src and pattern match + if schar != pchar { + return INVALID, .OK + } + + s_begin := s + cont := 1 + s := s + ssize + begin := pchar + end, _ := utf8_peek(ms.pattern[p + psize:]) or_return + + for s < len(ms.src) { + ch := utf8_advance(ms.src, &s) or_return + + switch ch{ + case end: + cont -= 1 + + if cont == 0 { + return s, .OK + } + + case begin: + cont += 1 + } + } + + return INVALID, .OK +} + +max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { + m := s + + // count up matches + for { + matched, size := single_match(ms, m, p, ep) or_return + + if !matched { + break + } + + m += size + } + + for s <= m { + result := match(ms, m, ep + 1) or_return + + if result != INVALID { + return result, .OK + } + + if s == m { + break + } + + m = utf8_prev(ms.src, s, m) + } + + return INVALID, .OK +} + +min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { + s := s + + for { + result := match(ms, s, ep + 1) or_return + + if result != INVALID { + return result, .OK + } else { + // TODO receive next step maybe? + matched, rune_size := single_match(ms, s, p, ep) or_return + + if matched { + s += rune_size + } else { + return INVALID, .OK + } + } + } +} + +start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Error) { + level := ms.level + + ms.capture[level].init = s + ms.capture[level].len = what + ms.level += 1 + + res = match(ms, s, p) or_return + if res == INVALID { + ms.level -= 1 + } + return +} + +end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) { + l := capture_to_close(ms) or_return + + // TODO double check, could do string as int index + ms.capture[l].len = s - ms.capture[l].init + + res = match(ms, s, p) or_return + if res == INVALID { + ms.capture[l].len = CAP_UNFINISHED + } + return +} + +match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: Error) { + index := check_capture(ms, char) or_return + length := ms.capture[index].len + + if len(ms.src) - s >= length { + return s + length, .OK + } + + return INVALID, .OK +} + +match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { + s := s + p := p + + if p == len(ms.pattern) { + return s, .OK + } + + // NOTE we can walk by ascii steps if we know the characters are ascii + char, _ := utf8_peek(ms.pattern[p:]) or_return + switch char { + case '(': + if p + 1 < len(ms.pattern) && ms.pattern[p + 1] == ')' { + s = start_capture(ms, s, p + 2, CAP_POSITION) or_return + } else { + s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return + } + + case ')': + s = end_capture(ms, s, p + 1) or_return + + case '$': + if p + 1 != len(ms.pattern) { + return match_default(ms, s, p) + } + + if len(ms.src) != s { + s = INVALID + } + + case L_ESC: + // stop short patterns like "%" only + if p + 1 >= len(ms.pattern) { + err = .OOB + return + } + + switch ms.pattern[p + 1] { + // balanced string + case 'b': + s = match_balance(ms, s, p + 2) or_return + + if s != INVALID { + // eg after %b() + return match(ms, s, p + 4) + } + + // frontier + case 'f': + p += 2 + + if ms.pattern[p] != '[' { + return INVALID, .Invalid_Pattern_Capture + } + + ep := class_end(ms, p) or_return + previous, current: rune + + // get previous + if s != 0 { + temp := utf8_prev(ms.src, 0, s) + previous, _ = utf8_peek(ms.src[temp:]) or_return + } + + // get current + if s != len(ms.src) { + current, _ = utf8_peek(ms.src[s:]) or_return + } + + m1 := match_bracket_class(ms, previous, p, ep - 1) or_return + m2 := match_bracket_class(ms, current, p, ep - 1) or_return + + if !m1 && m2 { + return match(ms, s, ep) + } + + s = INVALID + + // capture group + case '0'..<'9': + s = match_capture(ms, s, rune(ms.pattern[p + 1])) or_return + + if s != INVALID { + return match(ms, s, p + 2) + } + + case: return match_default(ms, s, p) + } + + case: + return match_default(ms, s, p) + } + + return s, .OK +} + +match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { + s := s + ep := class_end(ms, p) or_return + single_matched, ssize := single_match(ms, s, p, ep) or_return + + if !single_matched { + epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 + + switch epc { + case '*', '?', '-': return match(ms, s, ep + 1) + case: s = INVALID + } + } else { + epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 + + switch epc { + case '?': + result := match(ms, s + ssize, ep + 1) or_return + + if result != INVALID { + s = result + } else { + return match(ms, s, ep + 1) + } + + case '+': s = max_expand(ms, s + ssize, p, ep) or_return + case '*': s = max_expand(ms, s, p, ep) or_return + case '-': s = min_expand(ms, s, p, ep) or_return + case: return match(ms, s + ssize, ep) + } + } + + return s, .OK +} + +push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []Match) -> (err: Error) { + if i >= ms.level { + if i == 0 { + matches[0] = { 0, e - s } + } else { + err = .Invalid_Capture_Index + } + } else { + init := ms.capture[i].init + length := ms.capture[i].len + + switch length { + case CAP_UNFINISHED: err = .Unfinished_Capture + case CAP_POSITION: matches[i] = { init, init + 1 } + case: matches[i] = { init, init + length } + } + } + + return +} + +push_captures :: proc( + ms: ^Match_State, + s: int, + e: int, + matches: []Match, +) -> (nlevels: int, err: Error) { + nlevels = 1 if ms.level == 0 && s != -1 else ms.level + + for i in 0.. int { + for i in 0.. int { + l1 := len(s1) + l2 := len(s2) + + if l2 == 0 { + return 0 + } else if l2 > l1 { + return -1 + } else { + init := strings.index_byte(s1, s2[0]) + end := init + l2 + + for end <= l1 && init != -1 { + init += 1 + + if s1[init - 1:end] == s2 { + return init - 1 + } else { + next := strings.index_byte(s1[init:], s2[0]) + + if next == -1 { + return -1 + } else { + init = init + next + end = init + l2 + } + } + } + } + + return -1 +} + +// find a pattern with in a haystack with an offset +// allow_memfind will speed up simple searches +find_aux :: proc( + haystack: string, + pattern: string, + offset: int, + allow_memfind: bool, + matches: ^[MAX_CAPTURES]Match, +) -> (captures: int, err: Error) { + s := offset + p := 0 + + specials_idx := index_special(pattern) + if allow_memfind && specials_idx == -1 { + if index := lmem_find(haystack[s:], pattern); index != -1 { + matches[0] = { index + s, index + s + len(pattern) } + captures = 1 + return + } else { + return + } + } + + pattern := pattern + anchor: bool + if len(pattern) > 0 && pattern[0] == '^' { + anchor = true + pattern = pattern[1:] + } + + ms := Match_State { + src = haystack, + pattern = pattern, + } + + for { + res := match(&ms, s, p) or_return + + if res != INVALID { + // disallow non advancing match + if s == res { + err = .Match_Invalid + } + + // NOTE(Skytrias): first result is reserved for a full match + matches[0] = { s, res } + + // rest are the actual captures + captures = push_captures(&ms, -1, -1, matches[1:]) or_return + captures += 1 + + return + } + + s += 1 + + if !(s < len(ms.src) && !anchor) { + break + } + } + + return +} + +// iterative matching which returns the 0th/1st match +// rest has to be used from captures +gmatch :: proc( + haystack: ^string, + pattern: string, + captures: ^[MAX_CAPTURES]Match, +) -> (res: string, ok: bool) { + if len(haystack) > 0 { + length, err := find_aux(haystack^, pattern, 0, false, captures) + + if length != 0 && err == .OK { + ok = true + first := length > 1 ? 1 : 0 + cap := captures[first] + res = haystack[cap.byte_start:cap.byte_end] + haystack^ = haystack[cap.byte_end:] + } + } + + return +} + +// gsub with builder, replace patterns found with the replace content +gsub_builder :: proc( + builder: ^strings.Builder, + haystack: string, + pattern: string, + replace: string, +) -> string { + // find matches + captures: [MAX_CAPTURES]Match + haystack := haystack + + for { + length, err := find_aux(haystack, pattern, 0, false, &captures) + + // done + if length == 0 { + break + } + + if err != .OK { + return {} + } + + cap := captures[0] + + // write front till capture + strings.write_string(builder, haystack[:cap.byte_start]) + + // write replacements + strings.write_string(builder, replace) + + // advance string till end + haystack = haystack[cap.byte_end:] + } + + strings.write_string(builder, haystack[:]) + return strings.to_string(builder^) +} + +// uses temp builder to build initial string - then allocates the result +gsub_allocator :: proc( + haystack: string, + pattern: string, + replace: string, + allocator := context.allocator, +) -> string { + builder := strings.builder_make(0, 256, context.temp_allocator) + return gsub_builder(&builder, haystack, pattern, replace) +} + +Gsub_Proc :: proc( + // optional passed data + data: rawptr, + // word match found + word: string, + // current haystack for found captures + haystack: string, + // found captures - empty for no captures + captures: []Match, +) + +// call a procedure on every match in the haystack +gsub_with :: proc( + haystack: string, + pattern: string, + data: rawptr, + call: Gsub_Proc, +) { + // find matches + captures: [MAX_CAPTURES]Match + haystack := haystack + + for { + length, err := find_aux(haystack, pattern, 0, false, &captures) + + // done + if length == 0 || err != .OK { + break + } + + cap := captures[0] + + word := haystack[cap.byte_start:cap.byte_end] + call(data, word, haystack, captures[1:length]) + + // advance string till end + haystack = haystack[cap.byte_end:] + } +} + +gsub :: proc { gsub_builder, gsub_allocator } + +// iterative find with zeroth capture only +gfind :: proc( + haystack: ^string, + pattern: string, + captures: ^[MAX_CAPTURES]Match, +) -> (res: string, ok: bool) { + if len(haystack) > 0 { + length, err := find_aux(haystack^, pattern, 0, true, captures) + + if length != 0 && err == .OK { + ok = true + cap := captures[0] + res = haystack[cap.byte_start:cap.byte_end] + haystack^ = haystack[cap.byte_end:] + } + } + + return +} + +// rebuilds a pattern into a case insensitive pattern +pattern_case_insensitive_builder :: proc( + builder: ^strings.Builder, + pattern: string, +) -> (res: string) { + p := pattern + last_percent: bool + + for len(p) > 0 { + char, size := utf8.decode_rune_in_string(p) + + if unicode.is_alpha(char) && !last_percent { + // write character class in manually + strings.write_byte(builder, '[') + strings.write_rune(builder, unicode.to_lower(char)) + strings.write_rune(builder, unicode.to_upper(char)) + strings.write_byte(builder, ']') + } else { + strings.write_rune(builder, char) + } + + last_percent = char == L_ESC + p = p[size:] + } + + return strings.to_string(builder^) +} + +pattern_case_insensitive_allocator :: proc( + pattern: string, + cap: int = 256, + allocator := context.allocator, +) -> (res: string) { + builder := strings.builder_make(0, cap, context.temp_allocator) + return pattern_case_insensitive_builder(&builder, pattern) +} + +pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator } + +// Matcher helper struct that stores optional data you might want to use or not +// as lua is far more dynamic this helps dealing with too much data +// this also allows use of find/match/gmatch at through one struct +Matcher :: struct { + haystack: string, + pattern: string, + captures: [MAX_CAPTURES]Match, + captures_length: int, + offset: int, + err: Error, + + // changing content for iterators + iter: string, + iter_index: int, +} + +// init using haystack & pattern and an optional byte offset +matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matcher) { + res.haystack = haystack + res.pattern = pattern + res.offset = offset + res.iter = haystack + return +} + +// find the first match and return the byte start / end position in the string, true on success +matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bounds_check { + matcher.captures_length, matcher.err = find_aux( + matcher.haystack, + matcher.pattern, + matcher.offset, + true, + &matcher.captures, + ) + ok = matcher.captures_length > 0 && matcher.err == .OK + match := matcher.captures[0] + start = match.byte_start + end = match.byte_end + return +} + +// find the first match and return the matched word, true on success +matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_check { + matcher.captures_length, matcher.err = find_aux( + matcher.haystack, + matcher.pattern, + matcher.offset, + false, + &matcher.captures, + ) + ok = matcher.captures_length > 0 && matcher.err == .OK + match := matcher.captures[0] + word = matcher.haystack[match.byte_start:match.byte_end] + return +} + +// get the capture at the "correct" spot, as spot 0 is reserved for the first match +matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> string #no_bounds_check { + runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1) + cap := matcher.captures[index + 1] + return matcher.haystack[cap.byte_start:cap.byte_end] +} + +// get the raw match out of the captures, skipping spot 0 +matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> Match #no_bounds_check { + runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1) + return matcher.captures[index + 1] +} + +// alias +matcher_gmatch :: matcher_match_iter + +// iteratively match the haystack till it cant find any matches +matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: bool) { + if len(matcher.iter) > 0 { + matcher.captures_length, matcher.err = find_aux( + matcher.iter, + matcher.pattern, + matcher.offset, + false, + &matcher.captures, + ) + + if matcher.captures_length != 0 && matcher.err == .OK { + ok = true + first := matcher.captures_length > 1 ? 1 : 0 + match := matcher.captures[first] + + // output + res = matcher.iter[match.byte_start:match.byte_end] + index = matcher.iter_index + + // advance + matcher.iter_index += 1 + matcher.iter = matcher.iter[match.byte_end:] + } + } + + return +} + +// get a slice of all valid captures above the first match +matcher_captures_slice :: proc(matcher: ^Matcher) -> []Match { + return matcher.captures[1:matcher.captures_length] +} diff --git a/src/types.cpp b/src/types.cpp index c113525d6..5bddfc79e 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -2678,8 +2678,8 @@ gb_internal bool are_types_identical_internal(Type *x, Type *y, bool check_tuple x->Proc.variadic == y->Proc.variadic && x->Proc.diverging == y->Proc.diverging && x->Proc.optional_ok == y->Proc.optional_ok && - are_types_identical(x->Proc.params, y->Proc.params) && - are_types_identical(x->Proc.results, y->Proc.results); + are_types_identical_internal(x->Proc.params, y->Proc.params, check_tuple_names) && + are_types_identical_internal(x->Proc.results, y->Proc.results, check_tuple_names); case Type_Map: return are_types_identical(x->Map.key, y->Map.key) && diff --git a/tests/core/Makefile b/tests/core/Makefile index 92f12cbe7..478d6ae2c 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -2,7 +2,7 @@ ODIN=../../odin PYTHON=$(shell which python3) all: download_test_assets image_test compress_test strings_test hash_test crypto_test noise_test encoding_test \ - math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test c_libc_test + math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test match_test c_libc_test download_test_assets: $(PYTHON) download_assets.py @@ -49,5 +49,8 @@ os_exit_test: i18n_test: $(ODIN) run text/i18n -out:test_core_i18n +match_test: + $(ODIN) run text/match -out:test_core_match + c_libc_test: - $(ODIN) run c/libc -out:test_core_libc \ No newline at end of file + $(ODIN) run c/libc -out:test_core_libc diff --git a/tests/core/build.bat b/tests/core/build.bat index 69a102daa..e4e146588 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -71,6 +71,11 @@ echo Running core:text/i18n tests echo --- %PATH_TO_ODIN% run text\i18n %COMMON% -out:test_core_i18n.exe +echo --- +echo Running core:text/lua tests +echo --- +%PATH_TO_ODIN% run text\lua %COMMON% -out:test_core_lua_strlib.exe + echo --- echo Running core:slice tests echo --- diff --git a/tests/core/text/match/test_core_text_match.odin b/tests/core/text/match/test_core_text_match.odin new file mode 100644 index 000000000..79defb849 --- /dev/null +++ b/tests/core/text/match/test_core_text_match.odin @@ -0,0 +1,396 @@ +package test_strlib + +import "core:text/match" +import "core:testing" +import "core:fmt" +import "core:os" +import "core:io" + +TEST_count: int +TEST_fail: int + +// inline expect with custom props +failed :: proc(t: ^testing.T, ok: bool, loc := #caller_location) -> bool { + TEST_count += 1 + + if !ok { + fmt.wprintf(t.w, "%v: ", loc) + t.error_count += 1 + TEST_fail += 1 + } + + return !ok +} + +expect :: testing.expect + +logf :: proc(t: ^testing.T, format: string, args: ..any) { + fmt.wprintf(t.w, format, ..args) +} + +// find correct byte offsets +@test +test_find :: proc(t: ^testing.T) { + Entry :: struct { + s, p: string, + offset: int, + + match: struct { + start, end: int, // expected start/end + ok: bool, + }, + } + + ENTRIES :: [?]Entry { + { "", "", 0, { 0, 0, true } }, + { "alo", "", 0, { 0, 0, true } }, + { "a o a o a o", "a", 0, { 0, 1, true } }, + { "a o a o a o", "a o", 1, { 4, 7, true } }, + { "alo123alo", "12", 0, { 3, 5, true } }, + { "alo123alo", "^12", 0, {} }, + + // from https://riptutorial.com/lua/example/20535/string-find--introduction- + { "137'5 m47ch s0m3 d1g175", "m%d%d", 0, { 6, 9, true } }, + { "stack overflow", "[abc]", 0, { 2, 3, true } }, + { "stack overflow", "[^stack ]", 0, { 6, 7, true } }, + { "hello", "o%d?", 0, { 4, 5, true } }, + { "hello20", "o%d?", 0, { 4, 6, true } }, + { "helllllo", "el+", 0, { 1, 7, true } }, + { "heo", "el+", 0, {} }, + { "helelo", "h.+l", 0, { 0, 5, true } }, + { "helelo", "h.-l", 0, { 0, 3, true } }, + } + + for entry, i in ENTRIES { + matcher := match.matcher_init(entry.s, entry.p, entry.offset) + start, end, ok := match.matcher_find(&matcher) + success := entry.match.ok == ok && start == entry.match.start && end == entry.match.end + + if failed(t, success) { + logf(t, "Find %d failed!\n", i) + logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p) + logf(t, "\tSTART: %d == %d?\n", entry.match.start, start) + logf(t, "\tEND: %d == %d?\n", entry.match.end, end) + logf(t, "\tErr: %v\tLength %d\n", matcher.err, matcher.captures_length) + } + } +} + +@test +test_match :: proc(t: ^testing.T) { + Entry :: struct { + s, p: string, + result: string, // expected start/end + ok: bool, + } + + ENTRIES :: [?]Entry { + // star + { "aaab", ".*b", "aaab", true }, + { "aaa", ".*a", "aaa", true }, + { "b", ".*b", "b", true }, + + // plus + { "aaab", ".+b", "aaab", true }, + { "aaa", ".+a", "aaa", true }, + { "b", ".+b", "", false }, + + // question + { "aaab", ".?b", "ab", true }, + { "aaa", ".?a", "aa", true }, + { "b", ".?b", "b", true }, + + // CLASSES, checking shorted invalid patterns + { "a", "%", "", false }, + + // %a letter (A-Z, a-z) + { "letterS", "%a+", "letterS", true }, + { "Let123", "%a+", "Let", true }, + { "Let123", "%A+", "123", true }, + + // %c control characters (\n, \t, \r) + { "\n", "%c", "\n", true }, + { "\t", "%c", "\t", true }, + { "\t", "%C", "", false }, + { "a", "%C", "a", true }, + + // %d digit characters (0-9) + { "0123", "%d+", "0123", true }, + { "abcd", "%D+", "abcd", true }, + { "ab23", "%d+", "23", true }, + + // %l lower characters (a-z) + { "lowerCASE", "%l+", "lower", true }, + { "LOWERcase", "%l+", "case", true }, + { "LOWERcase", "%L+", "LOWER", true }, + + // %p punctionation characters (!, ?, &, ...) + { "!?&", "%p+", "!?&", true }, + { "abc!abc", "%p", "!", true }, + { "!abc!", "%P+", "abc", true }, + + // %s space characters + { " ", "%s", " ", true }, + { "a", "%S", "a", true }, + { "abc ", "%s+", " ", true }, + + // %u upper characters (A-Z) + { "lowerCASE", "%u+", "CASE", true }, + { "LOWERcase", "%u+", "LOWER", true }, + { "LOWERcase", "%U+", "case", true }, + + // %w alpha numeric (A-Z, a-z, 0-9) + { "0123", "%w+", "0123", true }, + { "abcd", "%W+", "", false }, + { "ab23", "%w+", "ab23", true }, + + // %x hexadecimal digits (0x1A, ...) + { "3", "%x", "3", true }, + { "9f", "%x+", "9f", true }, + { "9g", "%x+", "9", true }, + { "9g", "%X+", "g", true }, + + // random tests + { "f123", "%D", "f", true }, + { "f123", "%d", "1", true }, + { "f123", "%d+", "123", true }, + { "foo 123 bar", "%d%d%d", "123", true }, + { "Uppercase", "%u", "U", true }, + { "abcd", "[bc][bc]", "bc", true }, + { "abcd", "[^ad]", "b", true }, + { "123", "[0-9]", "1", true }, + + // end of line + { "testing this", "this$", "this", true }, + { "testing this ", "this$", "", false }, + { "testing this$", "this%$$", "this$", true }, + + // start of line + { "testing this", "^testing", "testing", true }, + { " testing this", "^testing", "", false }, + { "testing this", "^%w+", "testing", true }, + { " testing this", "^%w+", "", false }, + + // balanced string %b + { "testing (this) out", "%b()", "(this)", true }, + { "testing athisz out", "%baz", "athisz", true }, + { "testing _this_ out", "%b__", "_this_", true }, + { "testing _this_ out", "%b_", "", false }, + } + + for entry, i in ENTRIES { + matcher := match.matcher_init(entry.s, entry.p) + result, ok := match.matcher_match(&matcher) + success := entry.ok == ok && result == entry.result + + if failed(t, success) { + logf(t, "Match %d failed!\n", i) + logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p) + logf(t, "\tResults: WANTED %s\tGOT %s\n", entry.result, result) + logf(t, "\tErr: %v\tLength %d\n", matcher.err, matcher.captures_length) + } + } +} + +@test +test_captures :: proc(t: ^testing.T) { + Temp :: struct { + pattern: string, + captures: [match.MAX_CAPTURES]match.Match, + } + + // match all captures + compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) { + length, err := match.find_aux(haystack, test.pattern, 0, false, &test.captures) + if failed(t, len(comp) == length) { + logf(t, "Captures Compare Failed -> Lengths %d != %d\n", len(comp), length) + } + + for i in 0.. %s != %s\n", comp[i], text) + } + } + } + + // match to expected results + matches :: proc(t: ^testing.T, test: ^Temp, haystack: string, ok: bool, loc := #caller_location) { + length, err := match.find_aux(haystack, test.pattern, 0, false, &test.captures) + result := length > 0 && err == .OK + + if failed(t, result == ok) { + logf(t, "Capture match failed!\n") + logf(t, "\tErr: %v\n", err) + logf(t, "\tLength: %v\n", length) + } + } + + temp := Temp { pattern = "(one).+" } + compare_captures(t, &temp, " one two", { "one two", "one" }) + compare_captures(t, &temp, "three", {}) + + matches(t, &temp, "one dog", true) + matches(t, &temp, "dog one ", true) + matches(t, &temp, "dog one", false) + + temp.pattern = "^(%a+)" + matches(t, &temp, "one dog", true) + matches(t, &temp, " one dog", false) + + // multiple captures + { + haystack := " 233 hello dolly" + pattern := "%s*(%d+)%s+(%S+)" + captures: [match.MAX_CAPTURES]match.Match + match.find_aux(haystack, pattern, 0, false, &captures) + cap1 := captures[1] + cap2 := captures[2] + text1 := haystack[cap1.byte_start:cap1.byte_end] + text2 := haystack[cap2.byte_start:cap2.byte_end] + expect(t, text1 == "233", "Multi-Capture failed at 1") + expect(t, text2 == "hello", "Multi-Capture failed at 2") + } +} + +gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) { + if failed(t, a[index] == b) { + logf(t, "GMATCH %d failed!\n", index) + logf(t, "\t%s != %s\n", a[index], b) + } +} + +@test +test_gmatch :: proc(t: ^testing.T) { + { + matcher := match.matcher_init("testing this out 123", "%w+") + output := [?]string { "testing", "this", "out", "123" } + + for match, index in match.matcher_gmatch(&matcher) { + gmatch_check(t, index, output[:], match) + } + } + + { + matcher := match.matcher_init("#afdde6", "%x%x") + output := [?]string { "af", "dd", "e6" } + + for match, index in match.matcher_gmatch(&matcher) { + gmatch_check(t, index, output[:], match) + } + } + + { + matcher := match.matcher_init("testing outz captures yo outz outtz", "(out)z") + output := [?]string { "out", "out" } + + for match, index in match.matcher_gmatch(&matcher) { + gmatch_check(t, index, output[:], match) + } + } +} + +@test +test_gsub :: proc(t: ^testing.T) { + result := match.gsub("testing123testing", "%d+", " sup ", context.temp_allocator) + expect(t, result == "testing sup testing", "GSUB 0: failed") + result = match.gsub("testing123testing", "%a+", "345", context.temp_allocator) + expect(t, result == "345123345", "GSUB 1: failed") +} + +@test +test_gfind :: proc(t: ^testing.T) { + haystack := "test1 123 test2 123 test3" + pattern := "%w+" + captures: [match.MAX_CAPTURES]match.Match + s := &haystack + output := [?]string { "test1", "123", "test2", "123", "test3" } + index: int + + for word in match.gfind(s, pattern, &captures) { + if failed(t, output[index] == word) { + logf(t, "GFIND %d failed!\n", index) + logf(t, "\t%s != %s\n", output[index], word) + } + index += 1 + } +} + +@test +test_frontier :: proc(t: ^testing.T) { + Temp :: struct { + t: ^testing.T, + index: int, + output: [3]string, + } + + call :: proc(data: rawptr, word: string, haystack: string, captures: []match.Match) { + temp := cast(^Temp) data + + if failed(temp.t, word == temp.output[temp.index]) { + logf(temp.t, "GSUB_WITH %d failed!\n", temp.index) + logf(temp.t, "\t%s != %s\n", temp.output[temp.index], word) + } + + temp.index += 1 + } + + temp := Temp { + t = t, + output = { + "THE", + "QUICK", + "JUMPS", + }, + } + + // https://lua-users.org/wiki/FrontierPattern example taken from here + match.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call) +} + +@test +test_utf8 :: proc(t: ^testing.T) { + matcher := match.matcher_init("恥ず べき恥 フク恥ロ", "%w+") + output := [?]string { "恥ず", "べき恥", "フク恥ロ" } + + for match, index in match.matcher_gmatch(&matcher) { + gmatch_check(t, index, output[:], match) + } +} + +@test +test_case_insensitive :: proc(t: ^testing.T) { + { + pattern := match.pattern_case_insensitive("test", 256, context.temp_allocator) + goal := "[tT][eE][sS][tT]" + + if failed(t, pattern == goal) { + logf(t, "Case Insensitive Pattern doesn't match result\n") + logf(t, "\t%s != %s\n", pattern, goal) + } + } +} + +main :: proc() { + t: testing.T + stream := os.stream_from_handle(os.stdout) + w := io.to_writer(stream) + t.w = w + + test_find(&t) + test_match(&t) + test_captures(&t) + test_gmatch(&t) + test_gsub(&t) + test_gfind(&t) + test_frontier(&t) + test_utf8(&t) + test_case_insensitive(&t) + + fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + if TEST_fail > 0 { + os.exit(1) + } +} \ No newline at end of file