From bd3596f01261ab8f3238553fbfe5616a5441e1c0 Mon Sep 17 00:00:00 2001 From: skytrias Date: Wed, 30 Nov 2022 05:03:36 +0100 Subject: [PATCH 01/18] create lua strlib text package and tests --- core/text/lua/strlib.odin | 725 ++++++++++++++++++++ tests/core/Makefile | 5 +- tests/core/build.bat | 5 + tests/core/text/lua/test_core_text_lua.odin | 325 +++++++++ 4 files changed, 1059 insertions(+), 1 deletion(-) create mode 100644 core/text/lua/strlib.odin create mode 100644 tests/core/text/lua/test_core_text_lua.odin diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin new file mode 100644 index 000000000..a97ccdc8d --- /dev/null +++ b/core/text/lua/strlib.odin @@ -0,0 +1,725 @@ +package strlib + +import "core:strings" + +MAXCAPTURES :: 32 + +Capture :: struct { + init: int, + len: int, +} + +Match :: struct { + start, end: int, +} + +Error :: enum { + OK, + OOB, + Invalid_Capture_Index, + Invalid_Pattern_Capture, + Unfinished_Capture, +} + +L_ESC :: '%' +CAP_POSITION :: -2 +CAP_UNFINISHED :: -1 +INVALID :: -1 + +MatchState :: struct { + src: string, + pattern: string, + level: int, + capture: [MAXCAPTURES]Capture, +} + +match_class :: proc(c: u8, cl: u8) -> (res: bool) { + switch tolower(cl) { + case 'a': res = isalpha(c) + case 'c': res = iscntrl(c) + case 'd': res = isdigit(c) + case 'g': res = isgraph(c) + case 'l': res = islower(c) + case 'p': res = ispunct(c) + case 's': res = isspace(c) + case 'u': res = isupper(c) + case 'w': res = isalnum(c) + case 'x': res = isxdigit(c) + case: return cl == c + } + + return islower(cl) ? res : !res +} + +isalpha :: proc(c: u8) -> bool { + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') +} + +isdigit :: proc(c: u8) -> bool { + return '0' <= c && c <= '9' +} + +isalnum :: proc(c: u8) -> bool { + return isalpha(c) || isdigit(c) +} + +iscntrl :: proc(c: u8) -> bool { + return c <= '\007' || (c >= '\010' && c <= '\017') || (c >= '\020' && c <= '\027') || (c >= '\030' && c <= '\037') || c == '\177' +} + +islower :: proc(c: u8) -> bool { + return c >= 'a' && c <= 'z' +} + +isupper :: proc(c: u8) -> bool { + return c >= 'A' && c <= 'Z' +} + +isgraph :: proc(c: u8) -> bool { + return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') +} + +ispunct :: proc(c: u8) -> bool { + return (c >= '{' && c <= '~') || (c == '`') || (c >= '[' && c <= '_') || (c == '@') || (c >= ':' && c <= '?') || (c >= '(' && c <= '/') || (c >= '!' && c <= '\'') +} + +isxdigit :: proc(c: u8) -> bool { + return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') +} + +isspace :: proc(c: u8) -> bool { + return c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || c == ' ' +} + +// ascii safe +tolower :: proc(c: u8) -> u8 { + if c >= 65 && c <= 90 { // upper case + return c + 32 + } + + return c +} + +check_capture :: proc(ms: ^MatchState, l: rune) -> (int, Error) { + l := int(l - '1') + + if l < 0 || l >= ms.level || ms.capture[l].len == CAP_UNFINISHED { + return 0, .Invalid_Capture_Index + } + + return l, .OK +} + +capture_to_close :: proc(ms: ^MatchState) -> (int, Error) { + level := ms.level - 1 + + for level >= 0 { + if ms.capture[level].len == CAP_UNFINISHED { + return level, .OK + } + + level -= 1 + } + + return 0, .Invalid_Pattern_Capture +} + +classend :: proc(ms: ^MatchState, p: int) -> (int, Error) { + ch := ms.pattern[p] + p := p + 1 + + switch ch { + case L_ESC: { + // if > 0 { + // fmt.eprintln("ERR classend: not enough pattern length") + // return nil + // } + + return p + 1, .OK + } + + case '[': { + if ms.pattern[p] == '^' { + p += 1 + } + + // TODO double check + for { + ch := ms.pattern[p] + + if ch == L_ESC && p <= len(ms.pattern) { + // skip escapes like '%' + p += 1 + } + + if ms.pattern[p] == ']' { + break + } + + p += 1 + } + + return p + 1, .OK + } + + case: { + return p, .OK + } + } +} + +matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool { + sig := true + p := p + + if ms.pattern[p + 1] == '^' { + p += 1 + sig = false + } + + p += 1 + + // while inside of class range + for p < ec { + ch := ms.pattern[p] + + if ms.pattern[p] == L_ESC { + p += 1 + + if match_class(c, ms.pattern[p]) { + return sig + } + } else if ms.pattern[p + 1] == '-' && p + 2 < len(ms.pattern) { + // e.g. [a-z] check + if ms.pattern[p] <= c && c <= ms.pattern[p + 2] { + return sig + } + + p += 2 + } else if ms.pattern[p] == c { + return sig + } + + p += 1 + } + + return !sig +} + +singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> bool { + if s >= len(ms.src) { + return false + } + + switch ms.pattern[p] { + case '.': return true + case L_ESC: return match_class(ms.src[s], ms.pattern[p + 1]) + case '[': return matchbracketclass(ms, ms.src[s], p, ep - 1) + case: return ms.src[s] == ms.pattern[p] + } +} + +// matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) { +// s_begin := s +// s := s + 1 +// cont := 0 + +// begin := ms.pattern[p] +// end := ms.pattern[p + 1] +// print("BALANCED between", rune(begin), "AND", rune(end)) + +// for s < len(ms.src) { +// ch := ms.src[s] +// print("\t", rune(ch)) + +// if ch == end { +// cont -= 1 +// print("END", cont) + +// if cont == 0 { +// print("BALANCED RET", s + 1, len(ms.src), ms.src[s_begin:s + 1]) +// return s + 1 +// } +// } else if ch == begin { +// cont += 1 +// print("BEGIN", cont) +// } + +// s += 1 +// } + +// print("OUT OF BALANCE", cont) +// // out of balance +// return 0, . +// } + +max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { + i := 0 + for singlematch(ms, s + i, p, ep) { + i += 1 + } + + for i >= 0 { + result := match(ms, s + i, ep + 1) or_return + + if result != INVALID { + // print("SET", result) + return result, .OK + } + + i -= 1 + } + + return INVALID, .OK +} + +min_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { + s := s + + for { + result := match(ms, s, ep + 1) or_return + + if result != INVALID { + return result, .OK + } else if singlematch(ms, s, p, ep) { + s += 1 + } else { + return INVALID, .OK + } + } +} + +start_capture :: proc(ms: ^MatchState, s, p, what: int) -> (res: int, err: Error) { + level := ms.level + + ms.capture[level].init = s + ms.capture[level].len = what + ms.level += 1 + + res = match(ms, s, p) or_return + if res == INVALID { + ms.level -= 1 + } + return +} + +end_capture :: proc(ms: ^MatchState, s, p: int) -> (res: int, err: Error) { + l := capture_to_close(ms) or_return + + // TODO double check, could do string as int index + ms.capture[l].len = s - ms.capture[l].init + + res = match(ms, s, p) or_return + if res == INVALID { + ms.capture[l].len = CAP_UNFINISHED + } + return +} + +match_capture :: proc(ms: ^MatchState, s: int, char: rune) -> (res: int, err: Error) { + index := check_capture(ms, char) or_return + length := ms.capture[index].len + + if len(ms.src) - s >= length { + return s + length, .OK + } + + return INVALID, .OK +} + +match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { + s := s + p := p + + if p == len(ms.pattern) { + return s, .OK + } + + switch ms.pattern[p] { + case '(': { + if ms.pattern[p + 1] == ')' { + s = start_capture(ms, s, p + 2, CAP_POSITION) or_return + } else { + s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return + } + } + + case ')': { + s = end_capture(ms, s, p + 1) or_return + } + + case '$': { + if p + 1 != len(ms.pattern) { + return match_default(ms, s, p) + } + + if len(ms.src) != s { + s = INVALID + } + } + + case L_ESC: { + // stop short patterns like "%" only + if p + 1 >= len(ms.pattern) { + err = .OOB + return + } + + switch ms.pattern[p + 1] { + // balanced string + case 'b': { + // res := matchbalance(ms, s, p + 2) + + // if data, ok := res.?; ok { + // // s = data + // // eg after %b() + // // print("SUCCESS") + // return patt_match(ms, s, p + 4) + // } + + } + + // frontier + case 'f': { + // p += 2 + + // if ms.pattern[p] != '[' { + // print("missing '[' after %f in pattern") + // return nil + // } + + // ep := classend(ms, p).? + // previous := 0 if s == 0 else s - 1 + + // if !matchbracketclass(ms, ms.src[previous], p, ep - 1) && + // matchbracketclass(ms, ms.src[s], p, ep) { + // return patt_match(ms, s, ep) + // } + + // return nil + } + + // capture group + case '0'..<'9': { + s = match_capture(ms, s, rune(ms.pattern[p + 1])) or_return + + if s != INVALID { + return match(ms, s, p + 2) + } + } + + case: { + return match_default(ms, s, p) + } + } + } + + case: { + return match_default(ms, s, p) + // print("PATT DEF", rune(ms.src[s]), rune(ms.pattern[p])) + } + } + + return s, .OK +} + +match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { + s := s + ep := classend(ms, p) or_return + // ch := s < len(ms.src) ? rune(ms.src[s]) : 0 + + if !singlematch(ms, s, p, ep) { + epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 + // print("+++", rune(epc)) + + if epc == '*' || epc == '?' || epc == '-' { + return match(ms, s, ep + 1) + } else { + s = INVALID + } + } else { + epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 + // print("~~~", ch, rune(epc)) + + switch epc { + case '?': { + result := match(ms, s + 1, ep + 1) or_return + + if result != INVALID { + s = result + } else { + return match(ms, s, ep + 1) + } + } + + case '+': { + s = max_expand(ms, s + 1, p, ep) or_return + } + + case '*': { + s = max_expand(ms, s, p, ep) or_return + } + + case '-': { + s = min_expand(ms, s, p, ep) or_return + } + + case: { + return match(ms, s + 1, ep) + } + } + } + + return s, .OK +} + +push_onecapture :: proc( + ms: ^MatchState, + i: int, + s: int, + e: int, + matches: []Match, +) -> (err: Error) { + if i >= ms.level { + if i == 0 { + matches[0] = { 0, e - s } + } else { + err = .Invalid_Capture_Index + } + } else { + init := ms.capture[i].init + length := ms.capture[i].len + + switch length { + case CAP_UNFINISHED: { + err = .Unfinished_Capture + } + + case CAP_POSITION: { + matches[i] = { init - 1, init - 1 } + } + + case: { + matches[i] = { init, init + length } + } + } + } + + return +} + +push_captures :: proc( + ms: ^MatchState, + s: int, + e: int, + matches: []Match, +) -> (nlevels: int, err: Error) { + nlevels = 1 if ms.level == 0 && s != -1 else ms.level + + for i in 0.. int { + for i in 0.. int { + l1 := len(s1) + l2 := len(s2) + + if l2 == 0 { + return 0 + } else if l2 > l1 { + return -1 + } else { + init := strings.index_byte(s1, s2[0]) + end := init + l2 + + for end <= l1 && init != -1 { + init += 1 + + if s1[init - 1:end] == s2 { + return init - 1 + } else { + next := strings.index_byte(s1[init:], s2[0]) + + if next == -1 { + return -1 + } else { + init = init + next + end = init + l2 + } + } + } + } + + return -1 +} + +// find a pattern with in a haystack with an offset +// allow_memfind will speed up simple searches +find_aux :: proc( + haystack: string, + pattern: string, + offset: int, + allow_memfind: bool, + matches: ^[MAXCAPTURES]Match, +) -> (captures: int, err: Error) { + s := offset + p := 0 + + specials_idx := index_special(pattern) + if allow_memfind && specials_idx == -1 { + if index := lmemfind(haystack[s:], pattern); index != -1 { + matches[0] = { index + s, index + s + len(pattern) } + captures = 1 + return + } else { + return + } + } + + pattern := pattern + anchor: bool + if len(pattern) > 0 && pattern[0] == '^' { + anchor = true + pattern = pattern[1:] + } + + ms := MatchState { + src = haystack, + pattern = pattern, + } + + for { + res := match(&ms, s, p) or_return + + if res != INVALID { + // NOTE(Skytrias): first result is reserved for a full match + matches[0] = { s, res } + // rest are the actual captures + captures = push_captures(&ms, -1, -1, matches[1:]) or_return + captures += 1 + + return + } + + s += 1 + + if !(s < len(ms.src) && !anchor) { + break + } + } + + return +} + +// iterative matching which returns the 0th/1st match +// rest has to be used from captures +gmatch :: proc( + haystack: ^string, + pattern: string, + captures: ^[MAXCAPTURES]Match, +) -> (res: string, ok: bool) { + if len(haystack) > 0 { + length, err := find_aux(haystack^, pattern, 0, false, captures) + + if length != 0 && err == .OK { + ok = true + first := length > 1 ? 1 : 0 + cap := captures[first] + res = haystack[cap.start:cap.end] + haystack^ = haystack[cap.end:] + } + } + + return +} + +// gsub with builder +gsub_builder :: proc( + builder: ^strings.Builder, + haystack: string, + pattern: string, + replace: string, +) -> string { + // find matches + captures: [MAXCAPTURES]Match + haystack := haystack + + for { + length, err := find_aux(haystack, pattern, 0, false, &captures) + + // done + if length == 0 { + break + } + + if err != .OK { + return {} + } + + cap := captures[0] + + // write front till capture + strings.write_string(builder, haystack[:cap.start]) + + // write replacements + strings.write_string(builder, replace) + + // advance string till end + haystack = haystack[cap.end:] + } + + strings.write_string(builder, haystack[:]) + return strings.to_string(builder^) +} + +// uses temp builder to build initial string - then allocates the result +gsub_allocator :: proc( + haystack: string, + pattern: string, + replace: string, + allocator := context.allocator, +) -> string { + builder := strings.builder_make(0, 256, context.temp_allocator) + return gsub_builder(&builder, haystack, pattern, replace) +} + +gsub :: proc { gsub_builder, gsub_allocator } + +// iterative find with first capture only +gfind :: proc( + haystack: ^string, + pattern: string, + captures: ^[MAXCAPTURES]Match, +) -> (res: string, ok: bool) { + if len(haystack) > 0 { + length, err := find_aux(haystack^, pattern, 0, true, captures) + + if length != 0 && err == .OK { + ok = true + cap := captures[0] + res = haystack[cap.start:cap.end] + haystack^ = haystack[cap.end:] + } + } + + return +} \ No newline at end of file diff --git a/tests/core/Makefile b/tests/core/Makefile index 92f12cbe7..8a36f7ea3 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -2,7 +2,7 @@ ODIN=../../odin PYTHON=$(shell which python3) all: download_test_assets image_test compress_test strings_test hash_test crypto_test noise_test encoding_test \ - math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test c_libc_test + math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test lua_strlib_test c_libc_test download_test_assets: $(PYTHON) download_assets.py @@ -49,5 +49,8 @@ os_exit_test: i18n_test: $(ODIN) run text/i18n -out:test_core_i18n +lua_strlib_test: + $(ODIN) run text/lua -out:test_core_lua_strlib + c_libc_test: $(ODIN) run c/libc -out:test_core_libc \ No newline at end of file diff --git a/tests/core/build.bat b/tests/core/build.bat index 94f8e0fd8..66c1839bf 100644 --- a/tests/core/build.bat +++ b/tests/core/build.bat @@ -71,6 +71,11 @@ echo Running core:text/i18n tests echo --- %PATH_TO_ODIN% run text\i18n %COMMON% -out:test_core_i18n.exe +echo --- +echo Running core:text/lua tests +echo --- +%PATH_TO_ODIN% run text\lua %COMMON% -out:test_core_lua_strlib.exe + echo --- echo Running core:slice tests echo --- diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/lua/test_core_text_lua.odin new file mode 100644 index 000000000..63d8b5239 --- /dev/null +++ b/tests/core/text/lua/test_core_text_lua.odin @@ -0,0 +1,325 @@ +package test_strlib + +import lua "core:text/lua" +import "core:testing" +import "core:fmt" +import "core:os" + +TEST_count: int +TEST_fail: int + +when ODIN_TEST { + expect :: testing.expect +} else { + expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { + TEST_count += 1 + if !condition { + TEST_fail += 1 + fmt.printf("[%v] %v\n", loc, message) + return + } + } +} + +// find correct byte offsets +@test +test_find :: proc(t: ^testing.T) { + Entry :: struct { + s, p: string, + offset: int, + + match: struct { + start, end: int, // expected start/end + ok: bool, + }, + } + + ENTRIES :: [?]Entry { + { "", "", 0, { 0, 0, true } }, + { "alo", "", 0, { 0, 0, true } }, + { "a o a o a o", "a", 0, { 0, 1, true } }, + { "a o a o a o", "a o", 1, { 4, 7, true } }, + { "alo123alo", "12", 0, { 3, 5, true } }, + { "alo123alo", "^12", 0, {} }, + + // from https://riptutorial.com/lua/example/20535/string-find--introduction- + { "137'5 m47ch s0m3 d1g175", "m%d%d", 0, { 6, 9, true } }, + { "stack overflow", "[abc]", 0, { 2, 3, true } }, + { "stack overflow", "[^stack ]", 0, { 6, 7, true } }, + { "hello", "o%d?", 0, { 4, 5, true } }, + { "hello20", "o%d?", 0, { 4, 6, true } }, + { "helllllo", "el+", 0, { 1, 7, true } }, + { "heo", "el+", 0, {} }, + { "helelo", "h.+l", 0, { 0, 5, true } }, + { "helelo", "h.-l", 0, { 0, 3, true } }, + } + + captures: [lua.MAXCAPTURES]lua.Match + for entry in ENTRIES { + captures[0] = {} + length, err := lua.find_aux(entry.s, entry.p, entry.offset, true, &captures) + cap := captures[0] + ok := length > 0 && err == .OK + success := entry.match.ok == ok && entry.match.start == cap.start && entry.match.end == cap.end + + if !success { + fmt.eprintf("Find failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p) + } + + expect(t, entry.match.start == cap.start, "match start didnt match") + expect(t, entry.match.end == cap.end, "match end didnt match",) + expect(t, entry.match.ok == ok, "find result didnt match") + } +} + +@test +test_match :: proc(t: ^testing.T) { + Entry :: struct { + s, p: string, + result: string, // expected start/end + ok: bool, + } + + ENTRIES :: [?]Entry { + // star + { "aaab", ".*b", "aaab", true }, + { "aaa", ".*a", "aaa", true }, + { "b", ".*b", "b", true }, + + // plus + { "aaab", ".+b", "aaab", true }, + { "aaa", ".+a", "aaa", true }, + { "b", ".+b", "", false }, + + // question + { "aaab", ".?b", "ab", true }, + { "aaa", ".?a", "aa", true }, + { "b", ".?b", "b", true }, + + // CLASSES, checking shorted invalid patterns + { "a", "%", "", false }, + + // %a letter (A-Z, a-z) + { "letterS", "%a+", "letterS", true }, + { "Let123", "%a+", "Let", true }, + { "Let123", "%A+", "123", true }, + + // %c control characters (\n, \t, \r) + { "\n", "%c", "\n", true }, + { "\t", "%c", "\t", true }, + { "\t", "%C", "", false }, + { "a", "%C", "a", true }, + + // %d digit characters (0-9) + { "0123", "%d+", "0123", true }, + { "abcd", "%D+", "abcd", true }, + { "ab23", "%d+", "23", true }, + + // %l lower characters (a-z) + { "lowerCASE", "%l+", "lower", true }, + { "LOWERcase", "%l+", "case", true }, + { "LOWERcase", "%L+", "LOWER", true }, + + // %p punctionation characters (!, ?, &, ...) + { "!?&", "%p+", "!?&", true }, + { "abc!abc", "%p", "!", true }, + { "!abc!", "%P+", "abc", true }, + + // %s space characters + { " ", "%s", " ", true }, + { "a", "%S", "a", true }, + { "abc ", "%s+", " ", true }, + + // %u upper characters (A-Z) + { "lowerCASE", "%u+", "CASE", true }, + { "LOWERcase", "%u+", "LOWER", true }, + { "LOWERcase", "%U+", "case", true }, + + // %w alpha numeric (A-Z, a-z, 0-9) + { "0123", "%w+", "0123", true }, + { "abcd", "%W+", "", false }, + { "ab23", "%w+", "ab23", true }, + + // %x hexadecimal digits (0x1A, ...) + { "3", "%x", "3", true }, + { "9f", "%x+", "9f", true }, + { "9g", "%x+", "9", true }, + { "9g", "%X+", "g", true }, + + // random tests + { "f123", "%D", "f", true }, + { "f123", "%d", "1", true }, + { "f123", "%d+", "123", true }, + { "foo 123 bar", "%d%d%d", "123", true }, + { "Uppercase", "%u", "U", true }, + { "abcd", "[bc][bc]", "bc", true }, + { "abcd", "[^ad]", "b", true }, + { "123", "[0-9]", "1", true }, + + // end of line + { "testing this", "this$", "this", true }, + { "testing this ", "this$", "", false }, + { "testing this$", "this%$$", "this$", true }, + + // start of line + { "testing this", "^testing", "testing", true }, + { " testing this", "^testing", "", false }, + { "testing this", "^%w+", "testing", true }, + { " testing this", "^%w+", "", false }, + } + + captures: [lua.MAXCAPTURES]lua.Match + for entry, i in ENTRIES { + captures[0] = {} + length, err := lua.find_aux(entry.s, entry.p, 0, false, &captures) + ok := length > 0 && err == .OK + result := entry.s[captures[0].start:captures[0].end] + success := entry.ok == ok && result == entry.result + + if !success { + fmt.eprintf("Match failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p) + fmt.eprintf("Match invalid result! |WANTED:| %s |GOT:| %s\n", entry.result, result) + } + + expect(t, entry.ok == ok, "find result didnt match") + expect(t, result == entry.result, "entry result didnt match") + } +} + +@test +test_captures :: proc(t: ^testing.T) { + Temp :: struct { + pattern: string, + captures: [lua.MAXCAPTURES]lua.Match, + } + + // match all captures + compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) { + length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures) + expect(t, len(comp) == length, "didnt match input comparison strings", loc) + + for i in 0.. 0 && err == .OK + expect(t, result == ok, "result didnt eq", loc) + } + + temp := Temp { pattern = "(one).+" } + compare_captures(t, &temp, " one two", { "one two", "one" }) + compare_captures(t, &temp, "three", {}) + + matches(t, &temp, "one dog", true) + matches(t, &temp, "dog one ", true) + matches(t, &temp, "dog one", false) + + temp.pattern = "^(%a+)" + matches(t, &temp, "one dog", true) + matches(t, &temp, " one dog", false) + + // multiple captures + { + haystack := " 233 hello dolly" + pattern := "%s*(%d+)%s+(%S+)" + captures: [lua.MAXCAPTURES]lua.Match + lua.find_aux(haystack, pattern, 0, false, &captures) + cap1 := captures[1] + cap2 := captures[2] + text1 := haystack[cap1.start:cap1.end] + text2 := haystack[cap2.start:cap2.end] + expect(t, text1 == "233", "Multi-Capture failed at 1") + expect(t, text2 == "hello", "Multi-Capture failed at 2") + } +} + +@test +test_gmatch :: proc(t: ^testing.T) { + { + haystack := "testing this out 123" + pattern := "%w+" + s := &haystack + captures: [lua.MAXCAPTURES]lua.Match + output := [?]string { "testing", "this", "out", "123" } + index: int + + for match in lua.gmatch(s, pattern, &captures) { + expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + index += 1 + } + } + + { + haystack := "#afdde6" + pattern := "%x%x" + s := &haystack + captures: [lua.MAXCAPTURES]lua.Match + output := [?]string { "af", "dd", "e6" } + index: int + + for match in lua.gmatch(s, pattern, &captures) { + expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + index += 1 + } + } + + { + haystack := "testing outz captures yo outz outtz" + pattern := "(out)z" + s := &haystack + captures: [lua.MAXCAPTURES]lua.Match + output := [?]string { "out", "out" } + index: int + + for match in lua.gmatch(s, pattern, &captures) { + expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + index += 1 + } + } +} + +@test +test_gsub :: proc(t: ^testing.T) { + result := lua.gsub("testing123testing", "%d+", " sup ", context.temp_allocator) + expect(t, result == "testing sup testing", "GSUB 0: failed") + result = lua.gsub("testing123testing", "%a+", "345", context.temp_allocator) + expect(t, result == "345123345", "GSUB 1: failed") +} + +@test +test_gfind :: proc(t: ^testing.T) { + { + haystack := "test1 123 test2 123 test3" + pattern := "%w+" + captures: [lua.MAXCAPTURES]lua.Match + s := &haystack + output := [?]string { "test1", "123", "test2", "123", "test3" } + index: int + + for word in lua.gfind(s, pattern, &captures) { + expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word)) + index += 1 + } + } +} + +main :: proc() { + t: testing.T + test_find(&t) + test_match(&t) + test_captures(&t) + test_gmatch(&t) + test_gsub(&t) + test_gfind(&t) + + fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + if TEST_fail > 0 { + os.exit(1) + } +} \ No newline at end of file From 70bd220f3477b31c4e34b9cc08ee2c975194d26a Mon Sep 17 00:00:00 2001 From: skytrias Date: Wed, 30 Nov 2022 06:20:04 +0100 Subject: [PATCH 02/18] balanced string, frontier pattern, gsub_with and their tests added --- core/text/lua/strlib.odin | 158 ++++++++++++-------- tests/core/text/lua/test_core_text_lua.odin | 59 ++++++-- 2 files changed, 140 insertions(+), 77 deletions(-) diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin index a97ccdc8d..47ca73d24 100644 --- a/core/text/lua/strlib.odin +++ b/core/text/lua/strlib.odin @@ -19,6 +19,7 @@ Error :: enum { Invalid_Capture_Index, Invalid_Pattern_Capture, Unfinished_Capture, + Malformed_Pattern, } L_ESC :: '%' @@ -143,20 +144,22 @@ classend :: proc(ms: ^MatchState, p: int) -> (int, Error) { p += 1 } - // TODO double check - for { - ch := ms.pattern[p] + for ms.pattern[p] != ']' { + // if p == len(ms.pattern) { + // return 0, .Malformed_Pattern + // } - if ch == L_ESC && p <= len(ms.pattern) { + ch := ms.pattern[p] + p += 1 + + if p < len(ms.pattern) && ch == L_ESC { // skip escapes like '%' p += 1 } - if ms.pattern[p] == ']' { - break - } - - p += 1 + // if ms.pattern[p] == ']' { + // break + // } } return p + 1, .OK @@ -183,13 +186,14 @@ matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool { for p < ec { ch := ms.pattern[p] - if ms.pattern[p] == L_ESC { + // e.g. %a + if ms.pattern[p] == L_ESC { p += 1 if match_class(c, ms.pattern[p]) { return sig } - } else if ms.pattern[p + 1] == '-' && p + 2 < len(ms.pattern) { + } else if p + 2 < len(ms.pattern) && ms.pattern[p + 1] == '-' { // e.g. [a-z] check if ms.pattern[p] <= c && c <= ms.pattern[p + 2] { return sig @@ -219,39 +223,40 @@ singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> bool { } } -// matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) { -// s_begin := s -// s := s + 1 -// cont := 0 +matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) { + if p >= len(ms.pattern) - 1 { + return INVALID, .Invalid_Pattern_Capture + } -// begin := ms.pattern[p] -// end := ms.pattern[p + 1] -// print("BALANCED between", rune(begin), "AND", rune(end)) + // skip until the src and pattern match + if ms.src[s] != ms.pattern[p] { + return INVALID, .OK + } -// for s < len(ms.src) { -// ch := ms.src[s] -// print("\t", rune(ch)) + s_begin := s + cont := 1 + s := s + 1 + begin := ms.pattern[p] + end := ms.pattern[p + 1] -// if ch == end { -// cont -= 1 -// print("END", cont) + for s < len(ms.src) { + ch := ms.src[s] -// if cont == 0 { -// print("BALANCED RET", s + 1, len(ms.src), ms.src[s_begin:s + 1]) -// return s + 1 -// } -// } else if ch == begin { -// cont += 1 -// print("BEGIN", cont) -// } + if ch == end { + cont -= 1 -// s += 1 -// } + if cont == 0 { + return s + 1, .OK + } + } else if ch == begin { + cont += 1 + } -// print("OUT OF BALANCE", cont) -// // out of balance -// return 0, . -// } + s += 1 + } + + return INVALID, .OK +} max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { i := 0 @@ -263,7 +268,6 @@ max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { result := match(ms, s + i, ep + 1) or_return if result != INVALID { - // print("SET", result) return result, .OK } @@ -368,35 +372,34 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { switch ms.pattern[p + 1] { // balanced string case 'b': { - // res := matchbalance(ms, s, p + 2) - - // if data, ok := res.?; ok { - // // s = data - // // eg after %b() - // // print("SUCCESS") - // return patt_match(ms, s, p + 4) - // } + s = matchbalance(ms, s, p + 2) or_return + if s != INVALID { + // eg after %b() + return match(ms, s, p + 4) + } } // frontier case 'f': { - // p += 2 + p += 2 - // if ms.pattern[p] != '[' { - // print("missing '[' after %f in pattern") - // return nil - // } + if ms.pattern[p] != '[' { + return INVALID, .Invalid_Pattern_Capture + } - // ep := classend(ms, p).? - // previous := 0 if s == 0 else s - 1 + ep := classend(ms, p) or_return + previous := s == 0 ? '\x00' : ms.src[s - 1] + // allow last character to count too + current := s >= len(ms.src) ? '\x00' : ms.src[s] - // if !matchbracketclass(ms, ms.src[previous], p, ep - 1) && - // matchbracketclass(ms, ms.src[s], p, ep) { - // return patt_match(ms, s, ep) - // } + // fmt.eprintln("TRY", rune(ms.src[s]), ep) + if !matchbracketclass(ms, previous, p, ep - 1) && + matchbracketclass(ms, current, p, ep - 1) { + return match(ms, s, ep) + } - // return nil + s = INVALID } // capture group @@ -416,7 +419,6 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { case: { return match_default(ms, s, p) - // print("PATT DEF", rune(ms.src[s]), rune(ms.pattern[p])) } } @@ -426,11 +428,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { s := s ep := classend(ms, p) or_return - // ch := s < len(ms.src) ? rune(ms.src[s]) : 0 if !singlematch(ms, s, p, ep) { epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 - // print("+++", rune(epc)) if epc == '*' || epc == '?' || epc == '-' { return match(ms, s, ep + 1) @@ -439,7 +439,6 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { } } else { epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 - // print("~~~", ch, rune(epc)) switch epc { case '?': { @@ -652,7 +651,7 @@ gmatch :: proc( return } -// gsub with builder +// gsub with builder, replace patterns found with the replace content gsub_builder :: proc( builder: ^strings.Builder, haystack: string, @@ -702,9 +701,38 @@ gsub_allocator :: proc( return gsub_builder(&builder, haystack, pattern, replace) } +// call a procedure on every match in the haystack +gsub_with :: proc( + haystack: string, + pattern: string, + data: rawptr, + call: proc(data: rawptr, word: string), +) { + // find matches + captures: [MAXCAPTURES]Match + haystack := haystack + + for { + length, err := find_aux(haystack, pattern, 0, false, &captures) + + // done + if length == 0 || err != .OK { + break + } + + cap := captures[0] + + word := haystack[cap.start:cap.end] + call(data, word) + + // advance string till end + haystack = haystack[cap.end:] + } +} + gsub :: proc { gsub_builder, gsub_allocator } -// iterative find with first capture only +// iterative find with zeroth capture only gfind :: proc( haystack: ^string, pattern: string, diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/lua/test_core_text_lua.odin index 63d8b5239..832ebe2d9 100644 --- a/tests/core/text/lua/test_core_text_lua.odin +++ b/tests/core/text/lua/test_core_text_lua.odin @@ -15,7 +15,7 @@ when ODIN_TEST { TEST_count += 1 if !condition { TEST_fail += 1 - fmt.printf("[%v] %v\n", loc, message) + fmt.printf("%v %v\n", loc, message) return } } @@ -166,6 +166,12 @@ test_match :: proc(t: ^testing.T) { { " testing this", "^testing", "", false }, { "testing this", "^%w+", "testing", true }, { " testing this", "^%w+", "", false }, + + // balanced string %b + { "testing (this) out", "%b()", "(this)", true }, + { "testing athisz out", "%baz", "athisz", true }, + { "testing _this_ out", "%b__", "_this_", true }, + { "testing _this_ out", "%b_", "", false }, } captures: [lua.MAXCAPTURES]lua.Match @@ -294,21 +300,49 @@ test_gsub :: proc(t: ^testing.T) { @test test_gfind :: proc(t: ^testing.T) { - { - haystack := "test1 123 test2 123 test3" - pattern := "%w+" - captures: [lua.MAXCAPTURES]lua.Match - s := &haystack - output := [?]string { "test1", "123", "test2", "123", "test3" } - index: int + haystack := "test1 123 test2 123 test3" + pattern := "%w+" + captures: [lua.MAXCAPTURES]lua.Match + s := &haystack + output := [?]string { "test1", "123", "test2", "123", "test3" } + index: int - for word in lua.gfind(s, pattern, &captures) { - expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word)) - index += 1 - } + for word in lua.gfind(s, pattern, &captures) { + expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word)) + index += 1 } } +test_frontier :: proc(t: ^testing.T) { + Temp :: struct { + t: ^testing.T, + index: int, + output: [3]string, + } + + call :: proc(data: rawptr, word: string) { + temp := cast(^Temp) data + expect( + temp.t, + word == temp.output[temp.index], + fmt.tprintf("frontier temp didnt match: %s != %s\n", word, temp.output[temp.index]), + ) + temp.index += 1 + } + + temp := Temp { + t = t, + output = { + "THE", + "QUICK", + "JUMPS", + }, + } + + // https://lua-users.org/wiki/FrontierPattern example taken from here + lua.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call) +} + main :: proc() { t: testing.T test_find(&t) @@ -317,6 +351,7 @@ main :: proc() { test_gmatch(&t) test_gsub(&t) test_gfind(&t) + test_frontier(&t) fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) if TEST_fail > 0 { From 3f4bbbec29e663aab98ade2d0448456138c43ac8 Mon Sep 17 00:00:00 2001 From: skytrias Date: Thu, 1 Dec 2022 04:20:50 +0100 Subject: [PATCH 03/18] add proper unicode walking --- core/text/lua/strlib.odin | 329 +++++++++++++------- tests/core/text/lua/test_core_text_lua.odin | 142 ++++++--- 2 files changed, 309 insertions(+), 162 deletions(-) diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin index 47ca73d24..2d4543f75 100644 --- a/core/text/lua/strlib.odin +++ b/core/text/lua/strlib.odin @@ -1,5 +1,7 @@ package strlib +import "core:unicode" +import "core:unicode/utf8" import "core:strings" MAXCAPTURES :: 32 @@ -10,7 +12,7 @@ Capture :: struct { } Match :: struct { - start, end: int, + byte_start, byte_end: int, } Error :: enum { @@ -20,6 +22,7 @@ Error :: enum { Invalid_Pattern_Capture, Unfinished_Capture, Malformed_Pattern, + Rune_Error, } L_ESC :: '%' @@ -34,8 +37,8 @@ MatchState :: struct { capture: [MAXCAPTURES]Capture, } -match_class :: proc(c: u8, cl: u8) -> (res: bool) { - switch tolower(cl) { +match_class :: proc(c: rune, cl: rune) -> (res: bool) { + switch unicode.to_lower(cl) { case 'a': res = isalpha(c) case 'c': res = iscntrl(c) case 'd': res = isdigit(c) @@ -52,53 +55,92 @@ match_class :: proc(c: u8, cl: u8) -> (res: bool) { return islower(cl) ? res : !res } -isalpha :: proc(c: u8) -> bool { - return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') +isalpha :: proc(c: rune) -> bool { + return unicode.is_alpha(c) } -isdigit :: proc(c: u8) -> bool { - return '0' <= c && c <= '9' +isdigit :: proc(c: rune) -> bool { + return unicode.is_digit(c) } -isalnum :: proc(c: u8) -> bool { - return isalpha(c) || isdigit(c) +isalnum :: proc(c: rune) -> bool { + return unicode.is_alpha(c) || unicode.is_digit(c) } -iscntrl :: proc(c: u8) -> bool { - return c <= '\007' || (c >= '\010' && c <= '\017') || (c >= '\020' && c <= '\027') || (c >= '\030' && c <= '\037') || c == '\177' +iscntrl :: proc(c: rune) -> bool { + return unicode.is_control(c) } -islower :: proc(c: u8) -> bool { - return c >= 'a' && c <= 'z' +islower :: proc(c: rune) -> bool { + return unicode.is_lower(c) } -isupper :: proc(c: u8) -> bool { - return c >= 'A' && c <= 'Z' +isupper :: proc(c: rune) -> bool { + return unicode.is_upper(c) } -isgraph :: proc(c: u8) -> bool { - return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') +isgraph :: proc(c: rune) -> bool { + return unicode.is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') } -ispunct :: proc(c: u8) -> bool { - return (c >= '{' && c <= '~') || (c == '`') || (c >= '[' && c <= '_') || (c == '@') || (c >= ':' && c <= '?') || (c >= '(' && c <= '/') || (c >= '!' && c <= '\'') +ispunct :: proc(c: rune) -> bool { + return unicode.is_punct(c) } -isxdigit :: proc(c: u8) -> bool { - return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') +isxdigit :: proc(c: rune) -> bool { + return unicode.is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') } -isspace :: proc(c: u8) -> bool { - return c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r' || c == ' ' +isspace :: proc(c: rune) -> bool { + return unicode.is_space(c) } -// ascii safe -tolower :: proc(c: u8) -> u8 { - if c >= 65 && c <= 90 { // upper case - return c + 32 +utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) { + c, size = utf8.decode_rune_in_string(bytes) + + if c == utf8.RUNE_ERROR { + err = .Rune_Error } - return c + return +} + +utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) { + size: int + c, size = utf8.decode_rune_in_string(bytes[index^:]) + + if c == utf8.RUNE_ERROR { + err = .Rune_Error + } + + index^ += size + return +} + +// continuation byte? +is_cont :: proc(b: byte) -> bool { + return b & 0xc0 == 0x80 +} + +utf8_prev :: proc(bytes: string, a, b: int) -> int { + b := b + + for a < b && is_cont(bytes[b - 1]) { + b -= 1 + } + + return a < b ? b - 1 : a +} + +utf8_next :: proc(bytes: string, a: int) -> int { + a := a + b := len(bytes) + + for a < b - 1 && is_cont(bytes[a + 1]) { + a += 1 + } + + return a < b ? a + 1 : b } check_capture :: proc(ms: ^MatchState, l: rune) -> (int, Error) { @@ -125,54 +167,52 @@ capture_to_close :: proc(ms: ^MatchState) -> (int, Error) { return 0, .Invalid_Pattern_Capture } -classend :: proc(ms: ^MatchState, p: int) -> (int, Error) { - ch := ms.pattern[p] - p := p + 1 +classend :: proc(ms: ^MatchState, p: int) -> (step: int, err: Error) { + step = p + ch := utf8_advance(ms.pattern, &step) or_return switch ch { case L_ESC: { - // if > 0 { - // fmt.eprintln("ERR classend: not enough pattern length") - // return nil - // } + if step == len(ms.pattern) { + err = .Malformed_Pattern + return + } - return p + 1, .OK + utf8_advance(ms.pattern, &step) or_return } case '[': { - if ms.pattern[p] == '^' { - p += 1 + // fine with step by 1 + if ms.pattern[step] == '^' { + step += 1 } - for ms.pattern[p] != ']' { - // if p == len(ms.pattern) { - // return 0, .Malformed_Pattern - // } - - ch := ms.pattern[p] - p += 1 - - if p < len(ms.pattern) && ch == L_ESC { - // skip escapes like '%' - p += 1 + // run till end is reached + for ms.pattern[step] != ']' { + if step == len(ms.pattern) { + err = .Malformed_Pattern + return } - // if ms.pattern[p] == ']' { - // break - // } + // dont care about utf8 here + step += 1 + + if step < len(ms.pattern) && ms.pattern[step] == L_ESC { + // skip escapes like '%' + step += 1 + } } - return p + 1, .OK - } - - case: { - return p, .OK + // advance last time + step += 1 } } + + return } -matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool { - sig := true +matchbracketclass :: proc(ms: ^MatchState, c: rune, p, ec: int) -> (sig: bool, err: Error) { + sig = true p := p if ms.pattern[p + 1] == '^' { @@ -180,98 +220,127 @@ matchbracketclass :: proc(ms: ^MatchState, c: u8, p, ec: int) -> bool { sig = false } - p += 1 - // while inside of class range for p < ec { - ch := ms.pattern[p] + char := utf8_advance(ms.pattern, &p) or_return // e.g. %a - if ms.pattern[p] == L_ESC { - p += 1 + if char == L_ESC { + next := utf8_advance(ms.pattern, &p) or_return - if match_class(c, ms.pattern[p]) { - return sig - } - } else if p + 2 < len(ms.pattern) && ms.pattern[p + 1] == '-' { - // e.g. [a-z] check - if ms.pattern[p] <= c && c <= ms.pattern[p + 2] { - return sig + if match_class(c, next) { + return } + } else { + next, next_size := utf8_peek(ms.pattern[p:]) or_return - p += 2 - } else if ms.pattern[p] == c { - return sig + // TODO test case for [a-???] where ??? is missing + if next == '-' && p + next_size < len(ms.pattern) { + // advance 2 codepoints + p += next_size + last := utf8_advance(ms.pattern, &p) or_return + + if char <= c && c <= last { + return + } + } else if char == c { + return + } } - - p += 1 } - return !sig + sig = !sig + return } -singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> bool { +singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) { if s >= len(ms.src) { - return false + return } - switch ms.pattern[p] { - case '.': return true - case L_ESC: return match_class(ms.src[s], ms.pattern[p + 1]) - case '[': return matchbracketclass(ms, ms.src[s], p, ep - 1) - case: return ms.src[s] == ms.pattern[p] + pchar, psize := utf8_peek(ms.pattern[p:]) or_return + schar, ssize := utf8_peek(ms.src[s:]) or_return + schar_size = ssize + + switch pchar { + case '.': matched = true + case L_ESC: { + pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return + matched = match_class(schar, pchar_next) + } + case '[': { + matched = matchbracketclass(ms, schar, p, ep - 1) or_return + } + case: { + matched = schar == pchar + } } + + return } -matchbalance :: proc(ms: ^MatchState, s, p: int) -> (int, Error) { +matchbalance :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { if p >= len(ms.pattern) - 1 { return INVALID, .Invalid_Pattern_Capture } + schar, ssize := utf8_peek(ms.src[s:]) or_return + pchar, psize := utf8_peek(ms.pattern[p:]) or_return + // skip until the src and pattern match - if ms.src[s] != ms.pattern[p] { + if schar != pchar { return INVALID, .OK } s_begin := s cont := 1 - s := s + 1 - begin := ms.pattern[p] - end := ms.pattern[p + 1] + s := s + ssize + begin := pchar + end, _ := utf8_peek(ms.pattern[p + psize:]) or_return for s < len(ms.src) { - ch := ms.src[s] + ch := utf8_advance(ms.src, &s) or_return if ch == end { cont -= 1 if cont == 0 { - return s + 1, .OK + return s, .OK } } else if ch == begin { cont += 1 } - - s += 1 } return INVALID, .OK } max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { - i := 0 - for singlematch(ms, s + i, p, ep) { - i += 1 + m := s + + // count up matches + for { + matched, size := singlematch(ms, m, p, ep) or_return + + if !matched { + break + } + + m += size } - for i >= 0 { - result := match(ms, s + i, ep + 1) or_return + for s <= m { + result := match(ms, m, ep + 1) or_return if result != INVALID { return result, .OK } - i -= 1 + if s == m { + break + } + + m = utf8_prev(ms.src, s, m) } return INVALID, .OK @@ -285,10 +354,15 @@ min_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { if result != INVALID { return result, .OK - } else if singlematch(ms, s, p, ep) { - s += 1 } else { - return INVALID, .OK + // TODO receive next step maybe? + matched, rune_size := singlematch(ms, s, p, ep) or_return + + if matched { + s += rune_size + } else { + return INVALID, .OK + } } } } @@ -339,7 +413,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { return s, .OK } - switch ms.pattern[p] { + // NOTE we can walk by ascii steps if we know the characters are ascii + char, _ := utf8_peek(ms.pattern[p:]) or_return + switch char { case '(': { if ms.pattern[p + 1] == ')' { s = start_capture(ms, s, p + 2, CAP_POSITION) or_return @@ -389,13 +465,23 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { } ep := classend(ms, p) or_return - previous := s == 0 ? '\x00' : ms.src[s - 1] - // allow last character to count too - current := s >= len(ms.src) ? '\x00' : ms.src[s] + previous, current: rune - // fmt.eprintln("TRY", rune(ms.src[s]), ep) - if !matchbracketclass(ms, previous, p, ep - 1) && - matchbracketclass(ms, current, p, ep - 1) { + // get previous + if s != 0 { + temp := utf8_prev(ms.src, 0, s) + previous, _ = utf8_peek(ms.src[temp:]) or_return + } + + // get current + if s != len(ms.src) { + current, _ = utf8_peek(ms.src[s:]) or_return + } + + m1 := matchbracketclass(ms, previous, p, ep - 1) or_return + m2 := matchbracketclass(ms, current, p, ep - 1) or_return + + if !m1 && m2 { return match(ms, s, ep) } @@ -428,8 +514,9 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { s := s ep := classend(ms, p) or_return + single_matched, ssize := singlematch(ms, s, p, ep) or_return - if !singlematch(ms, s, p, ep) { + if !single_matched { epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 if epc == '*' || epc == '?' || epc == '-' { @@ -442,7 +529,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { switch epc { case '?': { - result := match(ms, s + 1, ep + 1) or_return + result := match(ms, s + ssize, ep + 1) or_return if result != INVALID { s = result @@ -452,7 +539,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { } case '+': { - s = max_expand(ms, s + 1, p, ep) or_return + s = max_expand(ms, s + ssize, p, ep) or_return } case '*': { @@ -464,7 +551,7 @@ match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { } case: { - return match(ms, s + 1, ep) + return match(ms, s + ssize, ep) } } } @@ -643,8 +730,8 @@ gmatch :: proc( ok = true first := length > 1 ? 1 : 0 cap := captures[first] - res = haystack[cap.start:cap.end] - haystack^ = haystack[cap.end:] + res = haystack[cap.byte_start:cap.byte_end] + haystack^ = haystack[cap.byte_end:] } } @@ -677,13 +764,13 @@ gsub_builder :: proc( cap := captures[0] // write front till capture - strings.write_string(builder, haystack[:cap.start]) + strings.write_string(builder, haystack[:cap.byte_start]) // write replacements strings.write_string(builder, replace) // advance string till end - haystack = haystack[cap.end:] + haystack = haystack[cap.byte_end:] } strings.write_string(builder, haystack[:]) @@ -722,11 +809,11 @@ gsub_with :: proc( cap := captures[0] - word := haystack[cap.start:cap.end] + word := haystack[cap.byte_start:cap.byte_end] call(data, word) // advance string till end - haystack = haystack[cap.end:] + haystack = haystack[cap.byte_end:] } } @@ -744,8 +831,8 @@ gfind :: proc( if length != 0 && err == .OK { ok = true cap := captures[0] - res = haystack[cap.start:cap.end] - haystack^ = haystack[cap.end:] + res = haystack[cap.byte_start:cap.byte_end] + haystack^ = haystack[cap.byte_end:] } } diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/lua/test_core_text_lua.odin index 832ebe2d9..630631fc2 100644 --- a/tests/core/text/lua/test_core_text_lua.odin +++ b/tests/core/text/lua/test_core_text_lua.odin @@ -4,21 +4,28 @@ import lua "core:text/lua" import "core:testing" import "core:fmt" import "core:os" +import "core:io" TEST_count: int TEST_fail: int -when ODIN_TEST { - expect :: testing.expect -} else { - expect :: proc(t: ^testing.T, condition: bool, message: string, loc := #caller_location) { - TEST_count += 1 - if !condition { - TEST_fail += 1 - fmt.printf("%v %v\n", loc, message) - return - } +// inline expect with custom props +failed :: proc(t: ^testing.T, ok: bool, loc := #caller_location) -> bool { + TEST_count += 1 + + if !ok { + fmt.wprintf(t.w, "%v: ", loc) + t.error_count += 1 + TEST_fail += 1 } + + return !ok +} + +expect :: testing.expect + +logf :: proc(t: ^testing.T, format: string, args: ..any) { + fmt.wprintf(t.w, format, ..args) } // find correct byte offsets @@ -55,20 +62,20 @@ test_find :: proc(t: ^testing.T) { } captures: [lua.MAXCAPTURES]lua.Match - for entry in ENTRIES { + for entry, i in ENTRIES { captures[0] = {} length, err := lua.find_aux(entry.s, entry.p, entry.offset, true, &captures) cap := captures[0] ok := length > 0 && err == .OK - success := entry.match.ok == ok && entry.match.start == cap.start && entry.match.end == cap.end + success := entry.match.ok == ok && entry.match.start == cap.byte_start && entry.match.end == cap.byte_end - if !success { - fmt.eprintf("Find failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p) + if failed(t, success) { + logf(t, "Find %d failed!\n", i) + logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p) + logf(t, "\tSTART: %d == %d?\n", entry.match.start, cap.byte_start) + logf(t, "\tEND: %d == %d?\n", entry.match.end, cap.byte_end) + logf(t, "\tErr: %v\tLength %d\n", err, length) } - - expect(t, entry.match.start == cap.start, "match start didnt match") - expect(t, entry.match.end == cap.end, "match end didnt match",) - expect(t, entry.match.ok == ok, "find result didnt match") } } @@ -179,16 +186,15 @@ test_match :: proc(t: ^testing.T) { captures[0] = {} length, err := lua.find_aux(entry.s, entry.p, 0, false, &captures) ok := length > 0 && err == .OK - result := entry.s[captures[0].start:captures[0].end] + result := entry.s[captures[0].byte_start:captures[0].byte_end] success := entry.ok == ok && result == entry.result - if !success { - fmt.eprintf("Match failed for: haystack = %s\tpattern = %s\n", entry.s, entry.p) - fmt.eprintf("Match invalid result! |WANTED:| %s |GOT:| %s\n", entry.result, result) + if failed(t, success) { + logf(t, "Match %d failed!\n", i) + logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p) + logf(t, "\tResults: WANTED %s\tGOT %s\n", entry.result, result) + logf(t, "\tErr: %v\tLength %d\n", err, length) } - - expect(t, entry.ok == ok, "find result didnt match") - expect(t, result == entry.result, "entry result didnt match") } } @@ -202,12 +208,17 @@ test_captures :: proc(t: ^testing.T) { // match all captures compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) { length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures) - expect(t, len(comp) == length, "didnt match input comparison strings", loc) + if failed(t, len(comp) == length) { + logf(t, "Captures Compare Failed -> Lengths %d != %d\n", len(comp), length) + } for i in 0.. %s != %s\n", comp[i], text) + } } } @@ -215,7 +226,12 @@ test_captures :: proc(t: ^testing.T) { matches :: proc(t: ^testing.T, test: ^Temp, haystack: string, ok: bool, loc := #caller_location) { length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures) result := length > 0 && err == .OK - expect(t, result == ok, "result didnt eq", loc) + + if failed(t, result == ok) { + logf(t, "Capture match failed!\n") + logf(t, "\tErr: %v\n", err) + logf(t, "\tLength: %v\n", length) + } } temp := Temp { pattern = "(one).+" } @@ -238,8 +254,8 @@ test_captures :: proc(t: ^testing.T) { lua.find_aux(haystack, pattern, 0, false, &captures) cap1 := captures[1] cap2 := captures[2] - text1 := haystack[cap1.start:cap1.end] - text2 := haystack[cap2.start:cap2.end] + text1 := haystack[cap1.byte_start:cap1.byte_end] + text2 := haystack[cap2.byte_start:cap2.byte_end] expect(t, text1 == "233", "Multi-Capture failed at 1") expect(t, text2 == "hello", "Multi-Capture failed at 2") } @@ -247,6 +263,13 @@ test_captures :: proc(t: ^testing.T) { @test test_gmatch :: proc(t: ^testing.T) { + gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) { + if failed(t, a[index] == b) { + logf(t, "GMATCH %d failed!\n", index) + logf(t, "\t%s != %s\n", a[index], b) + } + } + { haystack := "testing this out 123" pattern := "%w+" @@ -256,7 +279,7 @@ test_gmatch :: proc(t: ^testing.T) { index: int for match in lua.gmatch(s, pattern, &captures) { - expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + gmatch_check(t, index, output[:], match) index += 1 } } @@ -270,7 +293,7 @@ test_gmatch :: proc(t: ^testing.T) { index: int for match in lua.gmatch(s, pattern, &captures) { - expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + gmatch_check(t, index, output[:], match) index += 1 } } @@ -284,7 +307,7 @@ test_gmatch :: proc(t: ^testing.T) { index: int for match in lua.gmatch(s, pattern, &captures) { - expect(t, output[index] == match, fmt.tprintf("GMATCH %d failed: %s != %s\n", index, output[index], match)) + gmatch_check(t, index, output[:], match) index += 1 } } @@ -308,11 +331,15 @@ test_gfind :: proc(t: ^testing.T) { index: int for word in lua.gfind(s, pattern, &captures) { - expect(t, output[index] == word, fmt.tprintf("GFIND %d failed: %s != %s\n", index, output[index], word)) + if failed(t, output[index] == word) { + logf(t, "GFIND %d failed!\n", index) + logf(t, "\t%s != %s\n", output[index], word) + } index += 1 } } +@test test_frontier :: proc(t: ^testing.T) { Temp :: struct { t: ^testing.T, @@ -322,11 +349,12 @@ test_frontier :: proc(t: ^testing.T) { call :: proc(data: rawptr, word: string) { temp := cast(^Temp) data - expect( - temp.t, - word == temp.output[temp.index], - fmt.tprintf("frontier temp didnt match: %s != %s\n", word, temp.output[temp.index]), - ) + + if failed(temp.t, word == temp.output[temp.index]) { + logf(temp.t, "GSUB_WITH %d failed!\n", temp.index) + logf(temp.t, "\t%s != %s\n", temp.output[temp.index], word) + } + temp.index += 1 } @@ -343,8 +371,38 @@ test_frontier :: proc(t: ^testing.T) { lua.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call) } +@test +test_utf8 :: proc(t: ^testing.T) { + // { + // haystack := "恥ずべき恥フク恥ロ" + // s := &haystack + // captures: [lua.MAXCAPTURES]lua.Match + + // for word in lua.gmatch(s, "恥", &captures) { + // fmt.eprintln(word) + // } + // } + + { + haystack := "恥ずべき恥フク恥ロ" + s := &haystack + captures: [lua.MAXCAPTURES]lua.Match + + for word in lua.gmatch(s, "w+", &captures) { + fmt.eprintln(word) + } + } + + // captures: [MAXCAPTURES]Match + // length, err := lua.find_aux("damn, pattern,) +} + main :: proc() { t: testing.T + stream := os.stream_from_handle(os.stdout) + w := io.to_writer(stream) + t.w = w + test_find(&t) test_match(&t) test_captures(&t) @@ -353,7 +411,9 @@ main :: proc() { test_gfind(&t) test_frontier(&t) - fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) + // test_utf8(&t) + + fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) if TEST_fail > 0 { os.exit(1) } From eb5523d5d3326fce3c0d00ca071051875715447f Mon Sep 17 00:00:00 2001 From: skytrias Date: Thu, 1 Dec 2022 05:18:24 +0100 Subject: [PATCH 04/18] case insensitive helper call --- core/text/lua/strlib.odin | 56 +++++++++++++++++++-- tests/core/text/lua/test_core_text_lua.odin | 52 ++++++++++--------- 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin index 2d4543f75..ca95367e9 100644 --- a/core/text/lua/strlib.odin +++ b/core/text/lua/strlib.odin @@ -788,12 +788,23 @@ gsub_allocator :: proc( return gsub_builder(&builder, haystack, pattern, replace) } +Gsub_Proc :: proc( + // optional passed data + data: rawptr, + // word match found + word: string, + // current haystack for found captures + haystack: string, + // found captures - empty for no captures + captures: []Match, +) + // call a procedure on every match in the haystack gsub_with :: proc( haystack: string, pattern: string, data: rawptr, - call: proc(data: rawptr, word: string), + call: Gsub_Proc, ) { // find matches captures: [MAXCAPTURES]Match @@ -810,7 +821,7 @@ gsub_with :: proc( cap := captures[0] word := haystack[cap.byte_start:cap.byte_end] - call(data, word) + call(data, word, haystack, captures[1:length]) // advance string till end haystack = haystack[cap.byte_end:] @@ -837,4 +848,43 @@ gfind :: proc( } return -} \ No newline at end of file +} + +// rebuilds a pattern into a case insensitive pattern +pattern_case_insensitive_builder :: proc( + builder: ^strings.Builder, + pattern: string, +) -> (res: string) { + p := pattern + last_percent: bool + + for len(p) > 0 { + char, size := utf8.decode_rune_in_string(p) + + if unicode.is_alpha(char) && !last_percent { + // write character class in manually + strings.write_byte(builder, '[') + strings.write_rune(builder, unicode.to_lower(char)) + strings.write_rune(builder, unicode.to_upper(char)) + strings.write_byte(builder, ']') + } else { + strings.write_rune(builder, char) + } + + last_percent = char == L_ESC + p = p[size:] + } + + return strings.to_string(builder^) +} + +pattern_case_insensitive_allocator :: proc( + pattern: string, + cap: int = 256, + allocator := context.allocator, +) -> (res: string) { + builder := strings.builder_make(0, cap, context.temp_allocator) + return pattern_case_insensitive_builder(&builder, pattern) +} + +pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator } \ No newline at end of file diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/lua/test_core_text_lua.odin index 630631fc2..ed7d6c58f 100644 --- a/tests/core/text/lua/test_core_text_lua.odin +++ b/tests/core/text/lua/test_core_text_lua.odin @@ -261,14 +261,15 @@ test_captures :: proc(t: ^testing.T) { } } +gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) { + if failed(t, a[index] == b) { + logf(t, "GMATCH %d failed!\n", index) + logf(t, "\t%s != %s\n", a[index], b) + } +} + @test test_gmatch :: proc(t: ^testing.T) { - gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) { - if failed(t, a[index] == b) { - logf(t, "GMATCH %d failed!\n", index) - logf(t, "\t%s != %s\n", a[index], b) - } - } { haystack := "testing this out 123" @@ -347,7 +348,7 @@ test_frontier :: proc(t: ^testing.T) { output: [3]string, } - call :: proc(data: rawptr, word: string) { + call :: proc(data: rawptr, word: string, haystack: string, captures: []lua.Match) { temp := cast(^Temp) data if failed(temp.t, word == temp.output[temp.index]) { @@ -373,28 +374,31 @@ test_frontier :: proc(t: ^testing.T) { @test test_utf8 :: proc(t: ^testing.T) { - // { - // haystack := "恥ずべき恥フク恥ロ" - // s := &haystack - // captures: [lua.MAXCAPTURES]lua.Match - - // for word in lua.gmatch(s, "恥", &captures) { - // fmt.eprintln(word) - // } - // } - { - haystack := "恥ずべき恥フク恥ロ" + haystack := "恥ず べき恥 フク恥ロ" s := &haystack captures: [lua.MAXCAPTURES]lua.Match + output := [?]string { "恥ず", "べき恥", "フク恥ロ" } + index: int - for word in lua.gmatch(s, "w+", &captures) { - fmt.eprintln(word) + for word in lua.gmatch(s, "%w+", &captures) { + gmatch_check(t, index, output[:], word) + index += 1 } } +} - // captures: [MAXCAPTURES]Match - // length, err := lua.find_aux("damn, pattern,) +@test +test_case_insensitive :: proc(t: ^testing.T) { + { + pattern := lua.pattern_case_insensitive("test", 256, context.temp_allocator) + goal := "[tT][eE][sS][tT]" + + if failed(t, pattern == goal) { + logf(t, "Case Insensitive Pattern doesn't match result\n") + logf(t, "\t%s != %s\n", pattern, goal) + } + } } main :: proc() { @@ -410,8 +414,8 @@ main :: proc() { test_gsub(&t) test_gfind(&t) test_frontier(&t) - - // test_utf8(&t) + test_utf8(&t) + test_case_insensitive(&t) fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) if TEST_fail > 0 { From 0ae1812f90eac575b07fc3cfdfdcaf9c48119cd6 Mon Sep 17 00:00:00 2001 From: skytrias Date: Thu, 1 Dec 2022 19:39:07 +0100 Subject: [PATCH 05/18] small fixes and oob checks, stop infinite loops on empty matches --- core/text/lua/strlib.odin | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin index ca95367e9..05495afe4 100644 --- a/core/text/lua/strlib.odin +++ b/core/text/lua/strlib.odin @@ -23,6 +23,7 @@ Error :: enum { Unfinished_Capture, Malformed_Pattern, Rune_Error, + Match_Invalid, } L_ESC :: '%' @@ -183,17 +184,21 @@ classend :: proc(ms: ^MatchState, p: int) -> (step: int, err: Error) { case '[': { // fine with step by 1 - if ms.pattern[step] == '^' { + if step + 1 < len(ms.pattern) && ms.pattern[step] == '^' { step += 1 } // run till end is reached - for ms.pattern[step] != ']' { + for { if step == len(ms.pattern) { err = .Malformed_Pattern return } + if ms.pattern[step] == ']' { + break + } + // dont care about utf8 here step += 1 @@ -417,7 +422,7 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { char, _ := utf8_peek(ms.pattern[p:]) or_return switch char { case '(': { - if ms.pattern[p + 1] == ')' { + if p + 1 < len(ms.pattern) && ms.pattern[p + 1] == ')' { s = start_capture(ms, s, p + 2, CAP_POSITION) or_return } else { s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return @@ -582,7 +587,7 @@ push_onecapture :: proc( } case CAP_POSITION: { - matches[i] = { init - 1, init - 1 } + matches[i] = { init, init + 1 } } case: { @@ -697,8 +702,14 @@ find_aux :: proc( res := match(&ms, s, p) or_return if res != INVALID { + // disallow non advancing match + if s == res { + err = .Match_Invalid + } + // NOTE(Skytrias): first result is reserved for a full match matches[0] = { s, res } + // rest are the actual captures captures = push_captures(&ms, -1, -1, matches[1:]) or_return captures += 1 From 967afd8bbb3be3bed7caab2fd7bab09d8dfaf0cd Mon Sep 17 00:00:00 2001 From: skytrias Date: Thu, 1 Dec 2022 21:54:30 +0100 Subject: [PATCH 06/18] try helper procedures / structs --- core/text/lua/strlib.odin | 134 +++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin index 05495afe4..52b669f6e 100644 --- a/core/text/lua/strlib.odin +++ b/core/text/lua/strlib.odin @@ -1,5 +1,6 @@ package strlib +import "core:runtime" import "core:unicode" import "core:unicode/utf8" import "core:strings" @@ -898,4 +899,135 @@ pattern_case_insensitive_allocator :: proc( return pattern_case_insensitive_builder(&builder, pattern) } -pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator } \ No newline at end of file +pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator } + +find_test :: proc( + haystack: string, + pattern: string, + offset: int = 0, + captures: ..^Match, +) -> (start, end: int, ok: bool) #no_bounds_check { + matches: [MAXCAPTURES]Match + length, err := find_aux(haystack, pattern, offset, true, &matches) + + ok = length > 0 && err == .OK + match := matches[0] + start = match.byte_start + end = match.byte_end + + for arg, i in captures { + arg^ = matches[i + 1] + } + + return +} + +match_test :: proc( + haystack: string, + pattern: string, + offset: int = 0, + captures: ..^Match, +) -> (word: string, ok: bool) #no_bounds_check { + matches: [MAXCAPTURES]Match + length, err := find_aux(haystack, pattern, offset, true, &matches) + + ok = length > 0 && err == .OK + match := matches[0] + word = haystack[match.byte_start:match.byte_end] + + for arg, i in captures { + arg^ = matches[i + 1] + } + + return +} + +Matcher :: struct { + haystack: string, + pattern: string, + captures: [MAXCAPTURES]Match, + captures_length: int, + offset: int, + err: Error, + + // changing content for iterators + iter: string, +} + +// matcher +matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matcher) { + res.haystack = haystack + res.pattern = pattern + res.offset = offset + res.iter = haystack + return +} + +matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bounds_check { + matcher.captures_length, matcher.err = find_aux( + matcher.haystack, + matcher.pattern, + matcher.offset, + true, + &matcher.captures, + ) + ok = matcher.captures_length > 0 && matcher.err == .OK + match := matcher.captures[0] + start = match.byte_start + end = match.byte_end + return +} + +matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_check { + matcher.captures_length, matcher.err = find_aux( + matcher.haystack, + matcher.pattern, + matcher.offset, + false, + &matcher.captures, + ) + ok = matcher.captures_length > 0 && matcher.err == .OK + match := matcher.captures[0] + word = matcher.haystack[match.byte_start:match.byte_end] + return +} + +// get the capture at the correct spot +matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> string #no_bounds_check { + runtime.bounds_check_error_loc(loc, index + 1, MAXCAPTURES - 1) + cap := matcher.captures[index + 1] + return matcher.haystack[cap.byte_start:cap.byte_end] +} + +matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> Match #no_bounds_check { + runtime.bounds_check_error_loc(loc, index + 1, MAXCAPTURES - 1) + return matcher.captures[index + 1] +} + +matcher_gmatch :: matcher_match_iter + +matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, ok: bool) { + if len(matcher.iter) > 0 { + matcher.captures_length, matcher.err = find_aux( + matcher.iter, + matcher.pattern, + matcher.offset, + false, + &matcher.captures, + ) + + if matcher.captures_length != 0 && matcher.err == .OK { + ok = true + first := matcher.captures_length > 1 ? 1 : 0 + match := matcher.captures[first] + res = matcher.iter[match.byte_start:match.byte_end] + matcher.iter = matcher.iter[match.byte_end:] + } + } + + return +} + +matcher_captures_slice :: proc(matcher: ^Matcher) -> []Match { + return matcher.captures[1:matcher.captures_length] +} From ff7f139fd7bd8443902c98065ad90813f0039a6a Mon Sep 17 00:00:00 2001 From: skytrias Date: Tue, 20 Dec 2022 12:59:32 +0100 Subject: [PATCH 07/18] add iter_index and update tests to use easier matcher setup --- core/text/lua/strlib.odin | 9 ++- tests/core/text/lua/test_core_text_lua.odin | 72 +++++++-------------- 2 files changed, 30 insertions(+), 51 deletions(-) diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin index 52b669f6e..703da671b 100644 --- a/core/text/lua/strlib.odin +++ b/core/text/lua/strlib.odin @@ -952,6 +952,7 @@ Matcher :: struct { // changing content for iterators iter: string, + iter_index: int, } // matcher @@ -1006,7 +1007,7 @@ matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_locati matcher_gmatch :: matcher_match_iter -matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, ok: bool) { +matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: bool) { if len(matcher.iter) > 0 { matcher.captures_length, matcher.err = find_aux( matcher.iter, @@ -1020,7 +1021,13 @@ matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, ok: bool) { ok = true first := matcher.captures_length > 1 ? 1 : 0 match := matcher.captures[first] + + // output res = matcher.iter[match.byte_start:match.byte_end] + index = matcher.iter_index + + // advance + matcher.iter_index += 1 matcher.iter = matcher.iter[match.byte_end:] } } diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/lua/test_core_text_lua.odin index ed7d6c58f..b8e561765 100644 --- a/tests/core/text/lua/test_core_text_lua.odin +++ b/tests/core/text/lua/test_core_text_lua.odin @@ -61,21 +61,18 @@ test_find :: proc(t: ^testing.T) { { "helelo", "h.-l", 0, { 0, 3, true } }, } - captures: [lua.MAXCAPTURES]lua.Match for entry, i in ENTRIES { - captures[0] = {} - length, err := lua.find_aux(entry.s, entry.p, entry.offset, true, &captures) - cap := captures[0] - ok := length > 0 && err == .OK - success := entry.match.ok == ok && entry.match.start == cap.byte_start && entry.match.end == cap.byte_end + matcher := lua.matcher_init(entry.s, entry.p, entry.offset) + start, end, ok := lua.matcher_find(&matcher) + success := entry.match.ok == ok && start == entry.match.start && end == entry.match.end if failed(t, success) { logf(t, "Find %d failed!\n", i) logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p) - logf(t, "\tSTART: %d == %d?\n", entry.match.start, cap.byte_start) - logf(t, "\tEND: %d == %d?\n", entry.match.end, cap.byte_end) - logf(t, "\tErr: %v\tLength %d\n", err, length) - } + logf(t, "\tSTART: %d == %d?\n", entry.match.start, start) + logf(t, "\tEND: %d == %d?\n", entry.match.end, end) + logf(t, "\tErr: %v\tLength %d\n", matcher.err, matcher.captures_length) + } } } @@ -181,19 +178,16 @@ test_match :: proc(t: ^testing.T) { { "testing _this_ out", "%b_", "", false }, } - captures: [lua.MAXCAPTURES]lua.Match for entry, i in ENTRIES { - captures[0] = {} - length, err := lua.find_aux(entry.s, entry.p, 0, false, &captures) - ok := length > 0 && err == .OK - result := entry.s[captures[0].byte_start:captures[0].byte_end] + matcher := lua.matcher_init(entry.s, entry.p) + result, ok := lua.matcher_match(&matcher) success := entry.ok == ok && result == entry.result if failed(t, success) { logf(t, "Match %d failed!\n", i) logf(t, "\tHAYSTACK %s\tPATTERN %s\n", entry.s, entry.p) logf(t, "\tResults: WANTED %s\tGOT %s\n", entry.result, result) - logf(t, "\tErr: %v\tLength %d\n", err, length) + logf(t, "\tErr: %v\tLength %d\n", matcher.err, matcher.captures_length) } } } @@ -270,46 +264,30 @@ gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) { @test test_gmatch :: proc(t: ^testing.T) { - { - haystack := "testing this out 123" - pattern := "%w+" - s := &haystack - captures: [lua.MAXCAPTURES]lua.Match + matcher := lua.matcher_init("testing this out 123", "%w+") output := [?]string { "testing", "this", "out", "123" } - index: int - - for match in lua.gmatch(s, pattern, &captures) { + + for match, index in lua.matcher_gmatch(&matcher) { gmatch_check(t, index, output[:], match) - index += 1 } } { - haystack := "#afdde6" - pattern := "%x%x" - s := &haystack - captures: [lua.MAXCAPTURES]lua.Match + matcher := lua.matcher_init("#afdde6", "%x%x") output := [?]string { "af", "dd", "e6" } - index: int - - for match in lua.gmatch(s, pattern, &captures) { + + for match, index in lua.matcher_gmatch(&matcher) { gmatch_check(t, index, output[:], match) - index += 1 } } { - haystack := "testing outz captures yo outz outtz" - pattern := "(out)z" - s := &haystack - captures: [lua.MAXCAPTURES]lua.Match + matcher := lua.matcher_init("testing outz captures yo outz outtz", "(out)z") output := [?]string { "out", "out" } - index: int - for match in lua.gmatch(s, pattern, &captures) { + for match, index in lua.matcher_gmatch(&matcher) { gmatch_check(t, index, output[:], match) - index += 1 } } } @@ -374,17 +352,11 @@ test_frontier :: proc(t: ^testing.T) { @test test_utf8 :: proc(t: ^testing.T) { - { - haystack := "恥ず べき恥 フク恥ロ" - s := &haystack - captures: [lua.MAXCAPTURES]lua.Match - output := [?]string { "恥ず", "べき恥", "フク恥ロ" } - index: int + matcher := lua.matcher_init("恥ず べき恥 フク恥ロ", "%w+") + output := [?]string { "恥ず", "べき恥", "フク恥ロ" } - for word in lua.gmatch(s, "%w+", &captures) { - gmatch_check(t, index, output[:], word) - index += 1 - } + for match, index in lua.matcher_gmatch(&matcher) { + gmatch_check(t, index, output[:], match) } } From 1bea0f37720db3af0eeb19b72f3ae9aa05c0b0fa Mon Sep 17 00:00:00 2001 From: skytrias Date: Tue, 20 Dec 2022 15:48:10 +0100 Subject: [PATCH 08/18] fix styling issues and use switches in cases its necessary, add comments to helpers --- core/text/lua/strlib.odin | 493 ++++++++------------ tests/core/text/lua/test_core_text_lua.odin | 6 +- 2 files changed, 204 insertions(+), 295 deletions(-) diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin index 703da671b..12bb45aae 100644 --- a/core/text/lua/strlib.odin +++ b/core/text/lua/strlib.odin @@ -5,7 +5,7 @@ import "core:unicode" import "core:unicode/utf8" import "core:strings" -MAXCAPTURES :: 32 +MAX_CAPTURES :: 32 Capture :: struct { init: int, @@ -32,71 +32,52 @@ CAP_POSITION :: -2 CAP_UNFINISHED :: -1 INVALID :: -1 -MatchState :: struct { +Match_State :: struct { src: string, pattern: string, level: int, - capture: [MAXCAPTURES]Capture, + capture: [MAX_CAPTURES]Capture, } match_class :: proc(c: rune, cl: rune) -> (res: bool) { switch unicode.to_lower(cl) { - case 'a': res = isalpha(c) - case 'c': res = iscntrl(c) - case 'd': res = isdigit(c) - case 'g': res = isgraph(c) - case 'l': res = islower(c) - case 'p': res = ispunct(c) - case 's': res = isspace(c) - case 'u': res = isupper(c) - case 'w': res = isalnum(c) - case 'x': res = isxdigit(c) - case: return cl == c + case 'a': res = is_alpha(c) + case 'c': res = is_cntrl(c) + case 'd': res = is_digit(c) + case 'g': res = is_graph(c) + case 'l': res = is_lower(c) + case 'p': res = is_punct(c) + case 's': res = is_space(c) + case 'u': res = is_upper(c) + case 'w': res = is_alnum(c) + case 'x': res = is_xdigit(c) + case: return cl == c } - return islower(cl) ? res : !res + return is_lower(cl) ? res : !res } -isalpha :: proc(c: rune) -> bool { - return unicode.is_alpha(c) -} +is_alpha :: unicode.is_alpha +is_digit :: unicode.is_digit +is_lower :: unicode.is_lower +is_upper :: unicode.is_upper +is_punct :: unicode.is_punct +is_space :: unicode.is_space +is_cntrl :: unicode.is_control -isdigit :: proc(c: rune) -> bool { - return unicode.is_digit(c) -} - -isalnum :: proc(c: rune) -> bool { +is_alnum :: proc(c: rune) -> bool { return unicode.is_alpha(c) || unicode.is_digit(c) } -iscntrl :: proc(c: rune) -> bool { - return unicode.is_control(c) +is_graph :: proc(c: rune) -> bool { + return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c) } -islower :: proc(c: rune) -> bool { - return unicode.is_lower(c) -} - -isupper :: proc(c: rune) -> bool { - return unicode.is_upper(c) -} - -isgraph :: proc(c: rune) -> bool { - return unicode.is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') -} - -ispunct :: proc(c: rune) -> bool { - return unicode.is_punct(c) -} - -isxdigit :: proc(c: rune) -> bool { - return unicode.is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') -} - -isspace :: proc(c: rune) -> bool { - return unicode.is_space(c) +is_xdigit :: proc(c: rune) -> bool { + return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') || unicode.is_digit(c) } +// find the first utf8 charater and its size, return an error if the character is an error utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) { c, size = utf8.decode_rune_in_string(bytes) @@ -107,6 +88,8 @@ utf8_peek :: proc(bytes: string) -> (c: rune, size: int, err: Error) { return } +// find the first utf8 charater and its size and advance the index +// return an error if the character is an error utf8_advance :: proc(bytes: string, index: ^int) -> (c: rune, err: Error) { size: int c, size = utf8.decode_rune_in_string(bytes[index^:]) @@ -145,7 +128,7 @@ utf8_next :: proc(bytes: string, a: int) -> int { return a < b ? a + 1 : b } -check_capture :: proc(ms: ^MatchState, l: rune) -> (int, Error) { +check_capture :: proc(ms: ^Match_State, l: rune) -> (int, Error) { l := int(l - '1') if l < 0 || l >= ms.level || ms.capture[l].len == CAP_UNFINISHED { @@ -155,7 +138,7 @@ check_capture :: proc(ms: ^MatchState, l: rune) -> (int, Error) { return l, .OK } -capture_to_close :: proc(ms: ^MatchState) -> (int, Error) { +capture_to_close :: proc(ms: ^Match_State) -> (int, Error) { level := ms.level - 1 for level >= 0 { @@ -169,55 +152,53 @@ capture_to_close :: proc(ms: ^MatchState) -> (int, Error) { return 0, .Invalid_Pattern_Capture } -classend :: proc(ms: ^MatchState, p: int) -> (step: int, err: Error) { +class_end :: proc(ms: ^Match_State, p: int) -> (step: int, err: Error) { step = p ch := utf8_advance(ms.pattern, &step) or_return switch ch { - case L_ESC: { + case L_ESC: + if step == len(ms.pattern) { + err = .Malformed_Pattern + return + } + + utf8_advance(ms.pattern, &step) or_return + + case '[': + // fine with step by 1 + if step + 1 < len(ms.pattern) && ms.pattern[step] == '^' { + step += 1 + } + + // run till end is reached + for { if step == len(ms.pattern) { err = .Malformed_Pattern return } - utf8_advance(ms.pattern, &step) or_return - } - - case '[': { - // fine with step by 1 - if step + 1 < len(ms.pattern) && ms.pattern[step] == '^' { - step += 1 + if ms.pattern[step] == ']' { + break } - // run till end is reached - for { - if step == len(ms.pattern) { - err = .Malformed_Pattern - return - } - - if ms.pattern[step] == ']' { - break - } - - // dont care about utf8 here - step += 1 - - if step < len(ms.pattern) && ms.pattern[step] == L_ESC { - // skip escapes like '%' - step += 1 - } - } - - // advance last time + // dont care about utf8 here step += 1 + + if step < len(ms.pattern) && ms.pattern[step] == L_ESC { + // skip escapes like '%' + step += 1 + } } + + // advance last time + step += 1 } return } -matchbracketclass :: proc(ms: ^MatchState, c: rune, p, ec: int) -> (sig: bool, err: Error) { +match_bracket_class :: proc(ms: ^Match_State, c: rune, p, ec: int) -> (sig: bool, err: Error) { sig = true p := p @@ -259,7 +240,7 @@ matchbracketclass :: proc(ms: ^MatchState, c: rune, p, ec: int) -> (sig: bool, e return } -singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) { +single_match :: proc(ms: ^Match_State, s, p, ep: int) -> (matched: bool, schar_size: int, err: Error) { if s >= len(ms.src) { return } @@ -269,23 +250,18 @@ singlematch :: proc(ms: ^MatchState, s, p, ep: int) -> (matched: bool, schar_siz schar_size = ssize switch pchar { - case '.': matched = true - case L_ESC: { - pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return - matched = match_class(schar, pchar_next) - } - case '[': { - matched = matchbracketclass(ms, schar, p, ep - 1) or_return - } - case: { - matched = schar == pchar - } + case '.': matched = true + case L_ESC: + pchar_next, _ := utf8_peek(ms.pattern[p + psize:]) or_return + matched = match_class(schar, pchar_next) + case '[': matched = match_bracket_class(ms, schar, p, ep - 1) or_return + case: matched = schar == pchar } return } -matchbalance :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { +match_balance :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { if p >= len(ms.pattern) - 1 { return INVALID, .Invalid_Pattern_Capture } @@ -307,13 +283,15 @@ matchbalance :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { for s < len(ms.src) { ch := utf8_advance(ms.src, &s) or_return - if ch == end { + switch ch{ + case end: cont -= 1 if cont == 0 { return s, .OK } - } else if ch == begin { + + case begin: cont += 1 } } @@ -321,12 +299,12 @@ matchbalance :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { return INVALID, .OK } -max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { +max_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { m := s // count up matches for { - matched, size := singlematch(ms, m, p, ep) or_return + matched, size := single_match(ms, m, p, ep) or_return if !matched { break @@ -352,7 +330,7 @@ max_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { return INVALID, .OK } -min_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { +min_expand :: proc(ms: ^Match_State, s, p, ep: int) -> (res: int, err: Error) { s := s for { @@ -362,7 +340,7 @@ min_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { return result, .OK } else { // TODO receive next step maybe? - matched, rune_size := singlematch(ms, s, p, ep) or_return + matched, rune_size := single_match(ms, s, p, ep) or_return if matched { s += rune_size @@ -373,7 +351,7 @@ min_expand :: proc(ms: ^MatchState, s, p, ep: int) -> (res: int, err: Error) { } } -start_capture :: proc(ms: ^MatchState, s, p, what: int) -> (res: int, err: Error) { +start_capture :: proc(ms: ^Match_State, s, p, what: int) -> (res: int, err: Error) { level := ms.level ms.capture[level].init = s @@ -387,7 +365,7 @@ start_capture :: proc(ms: ^MatchState, s, p, what: int) -> (res: int, err: Error return } -end_capture :: proc(ms: ^MatchState, s, p: int) -> (res: int, err: Error) { +end_capture :: proc(ms: ^Match_State, s, p: int) -> (res: int, err: Error) { l := capture_to_close(ms) or_return // TODO double check, could do string as int index @@ -400,7 +378,7 @@ end_capture :: proc(ms: ^MatchState, s, p: int) -> (res: int, err: Error) { return } -match_capture :: proc(ms: ^MatchState, s: int, char: rune) -> (res: int, err: Error) { +match_capture :: proc(ms: ^Match_State, s: int, char: rune) -> (res: int, err: Error) { index := check_capture(ms, char) or_return length := ms.capture[index].len @@ -411,7 +389,7 @@ match_capture :: proc(ms: ^MatchState, s: int, char: rune) -> (res: int, err: Er return INVALID, .OK } -match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { +match :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { s := s p := p @@ -422,156 +400,127 @@ match :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { // NOTE we can walk by ascii steps if we know the characters are ascii char, _ := utf8_peek(ms.pattern[p:]) or_return switch char { - case '(': { - if p + 1 < len(ms.pattern) && ms.pattern[p + 1] == ')' { - s = start_capture(ms, s, p + 2, CAP_POSITION) or_return - } else { - s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return - } + case '(': + if p + 1 < len(ms.pattern) && ms.pattern[p + 1] == ')' { + s = start_capture(ms, s, p + 2, CAP_POSITION) or_return + } else { + s = start_capture(ms, s, p + 1, CAP_UNFINISHED) or_return } - case ')': { - s = end_capture(ms, s, p + 1) or_return - } + case ')': + s = end_capture(ms, s, p + 1) or_return - case '$': { - if p + 1 != len(ms.pattern) { - return match_default(ms, s, p) - } - - if len(ms.src) != s { - s = INVALID - } - } - - case L_ESC: { - // stop short patterns like "%" only - if p + 1 >= len(ms.pattern) { - err = .OOB - return - } - - switch ms.pattern[p + 1] { - // balanced string - case 'b': { - s = matchbalance(ms, s, p + 2) or_return - - if s != INVALID { - // eg after %b() - return match(ms, s, p + 4) - } - } - - // frontier - case 'f': { - p += 2 - - if ms.pattern[p] != '[' { - return INVALID, .Invalid_Pattern_Capture - } - - ep := classend(ms, p) or_return - previous, current: rune - - // get previous - if s != 0 { - temp := utf8_prev(ms.src, 0, s) - previous, _ = utf8_peek(ms.src[temp:]) or_return - } - - // get current - if s != len(ms.src) { - current, _ = utf8_peek(ms.src[s:]) or_return - } - - m1 := matchbracketclass(ms, previous, p, ep - 1) or_return - m2 := matchbracketclass(ms, current, p, ep - 1) or_return - - if !m1 && m2 { - return match(ms, s, ep) - } - - s = INVALID - } - - // capture group - case '0'..<'9': { - s = match_capture(ms, s, rune(ms.pattern[p + 1])) or_return - - if s != INVALID { - return match(ms, s, p + 2) - } - } - - case: { - return match_default(ms, s, p) - } - } - } - - case: { + case '$': + if p + 1 != len(ms.pattern) { return match_default(ms, s, p) + } + + if len(ms.src) != s { + s = INVALID } + + case L_ESC: + // stop short patterns like "%" only + if p + 1 >= len(ms.pattern) { + err = .OOB + return + } + + switch ms.pattern[p + 1] { + // balanced string + case 'b': + s = match_balance(ms, s, p + 2) or_return + + if s != INVALID { + // eg after %b() + return match(ms, s, p + 4) + } + + // frontier + case 'f': + p += 2 + + if ms.pattern[p] != '[' { + return INVALID, .Invalid_Pattern_Capture + } + + ep := class_end(ms, p) or_return + previous, current: rune + + // get previous + if s != 0 { + temp := utf8_prev(ms.src, 0, s) + previous, _ = utf8_peek(ms.src[temp:]) or_return + } + + // get current + if s != len(ms.src) { + current, _ = utf8_peek(ms.src[s:]) or_return + } + + m1 := match_bracket_class(ms, previous, p, ep - 1) or_return + m2 := match_bracket_class(ms, current, p, ep - 1) or_return + + if !m1 && m2 { + return match(ms, s, ep) + } + + s = INVALID + + // capture group + case '0'..<'9': + s = match_capture(ms, s, rune(ms.pattern[p + 1])) or_return + + if s != INVALID { + return match(ms, s, p + 2) + } + + case: return match_default(ms, s, p) + } + + case: + return match_default(ms, s, p) } return s, .OK } -match_default :: proc(ms: ^MatchState, s, p: int) -> (unused: int, err: Error) { +match_default :: proc(ms: ^Match_State, s, p: int) -> (unused: int, err: Error) { s := s - ep := classend(ms, p) or_return - single_matched, ssize := singlematch(ms, s, p, ep) or_return + ep := class_end(ms, p) or_return + single_matched, ssize := single_match(ms, s, p, ep) or_return if !single_matched { epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 - if epc == '*' || epc == '?' || epc == '-' { - return match(ms, s, ep + 1) - } else { - s = INVALID + switch epc { + case '*', '?', '-': return match(ms, s, ep + 1) + case: s = INVALID } } else { epc := ep < len(ms.pattern) ? ms.pattern[ep] : 0 switch epc { - case '?': { - result := match(ms, s + ssize, ep + 1) or_return - - if result != INVALID { - s = result - } else { - return match(ms, s, ep + 1) - } + case '?': + result := match(ms, s + ssize, ep + 1) or_return + + if result != INVALID { + s = result + } else { + return match(ms, s, ep + 1) } - case '+': { - s = max_expand(ms, s + ssize, p, ep) or_return - } - - case '*': { - s = max_expand(ms, s, p, ep) or_return - } - - case '-': { - s = min_expand(ms, s, p, ep) or_return - } - - case: { - return match(ms, s + ssize, ep) - } + case '+': s = max_expand(ms, s + ssize, p, ep) or_return + case '*': s = max_expand(ms, s, p, ep) or_return + case '-': s = min_expand(ms, s, p, ep) or_return + case: return match(ms, s + ssize, ep) } } return s, .OK } -push_onecapture :: proc( - ms: ^MatchState, - i: int, - s: int, - e: int, - matches: []Match, -) -> (err: Error) { +push_onecapture :: proc(ms: ^Match_State, i: int, s: int, e: int, matches: []Match) -> (err: Error) { if i >= ms.level { if i == 0 { matches[0] = { 0, e - s } @@ -583,17 +532,9 @@ push_onecapture :: proc( length := ms.capture[i].len switch length { - case CAP_UNFINISHED: { - err = .Unfinished_Capture - } - - case CAP_POSITION: { - matches[i] = { init, init + 1 } - } - - case: { - matches[i] = { init, init + length } - } + case CAP_UNFINISHED: err = .Unfinished_Capture + case CAP_POSITION: matches[i] = { init, init + 1 } + case: matches[i] = { init, init + length } } } @@ -601,7 +542,7 @@ push_onecapture :: proc( } push_captures :: proc( - ms: ^MatchState, + ms: ^Match_State, s: int, e: int, matches: []Match, @@ -621,8 +562,8 @@ SPECIALS_TABLE := [256]u8 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // helper call to quick search for special characters index_special :: proc(text: string) -> int { + // TODO is this utf8 safe? for i in 0.. int { return -1 } -lmemfind :: proc(s1, s2: string) -> int { +lmem_find :: proc(s1, s2: string) -> int { l1 := len(s1) l2 := len(s2) @@ -671,14 +612,14 @@ find_aux :: proc( pattern: string, offset: int, allow_memfind: bool, - matches: ^[MAXCAPTURES]Match, + matches: ^[MAX_CAPTURES]Match, ) -> (captures: int, err: Error) { s := offset p := 0 specials_idx := index_special(pattern) if allow_memfind && specials_idx == -1 { - if index := lmemfind(haystack[s:], pattern); index != -1 { + if index := lmem_find(haystack[s:], pattern); index != -1 { matches[0] = { index + s, index + s + len(pattern) } captures = 1 return @@ -694,7 +635,7 @@ find_aux :: proc( pattern = pattern[1:] } - ms := MatchState { + ms := Match_State { src = haystack, pattern = pattern, } @@ -733,7 +674,7 @@ find_aux :: proc( gmatch :: proc( haystack: ^string, pattern: string, - captures: ^[MAXCAPTURES]Match, + captures: ^[MAX_CAPTURES]Match, ) -> (res: string, ok: bool) { if len(haystack) > 0 { length, err := find_aux(haystack^, pattern, 0, false, captures) @@ -758,7 +699,7 @@ gsub_builder :: proc( replace: string, ) -> string { // find matches - captures: [MAXCAPTURES]Match + captures: [MAX_CAPTURES]Match haystack := haystack for { @@ -819,7 +760,7 @@ gsub_with :: proc( call: Gsub_Proc, ) { // find matches - captures: [MAXCAPTURES]Match + captures: [MAX_CAPTURES]Match haystack := haystack for { @@ -846,7 +787,7 @@ gsub :: proc { gsub_builder, gsub_allocator } gfind :: proc( haystack: ^string, pattern: string, - captures: ^[MAXCAPTURES]Match, + captures: ^[MAX_CAPTURES]Match, ) -> (res: string, ok: bool) { if len(haystack) > 0 { length, err := find_aux(haystack^, pattern, 0, true, captures) @@ -901,51 +842,13 @@ pattern_case_insensitive_allocator :: proc( pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator } -find_test :: proc( - haystack: string, - pattern: string, - offset: int = 0, - captures: ..^Match, -) -> (start, end: int, ok: bool) #no_bounds_check { - matches: [MAXCAPTURES]Match - length, err := find_aux(haystack, pattern, offset, true, &matches) - - ok = length > 0 && err == .OK - match := matches[0] - start = match.byte_start - end = match.byte_end - - for arg, i in captures { - arg^ = matches[i + 1] - } - - return -} - -match_test :: proc( - haystack: string, - pattern: string, - offset: int = 0, - captures: ..^Match, -) -> (word: string, ok: bool) #no_bounds_check { - matches: [MAXCAPTURES]Match - length, err := find_aux(haystack, pattern, offset, true, &matches) - - ok = length > 0 && err == .OK - match := matches[0] - word = haystack[match.byte_start:match.byte_end] - - for arg, i in captures { - arg^ = matches[i + 1] - } - - return -} - +// Matcher helper struct that stores optional data you might want to use or not +// as lua is far more dynamic this helps dealing with too much data +// this also allows use of find/match/gmatch at through one struct Matcher :: struct { haystack: string, pattern: string, - captures: [MAXCAPTURES]Match, + captures: [MAX_CAPTURES]Match, captures_length: int, offset: int, err: Error, @@ -955,7 +858,7 @@ Matcher :: struct { iter_index: int, } -// matcher +// init using haystack & pattern and an optional byte offset matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matcher) { res.haystack = haystack res.pattern = pattern @@ -964,6 +867,7 @@ matcher_init :: proc(haystack, pattern: string, offset: int = 0) -> (res: Matche return } +// find the first match and return the byte start / end position in the string, true on success matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bounds_check { matcher.captures_length, matcher.err = find_aux( matcher.haystack, @@ -979,6 +883,7 @@ matcher_find :: proc(matcher: ^Matcher) -> (start, end: int, ok: bool) #no_bound return } +// find the first match and return the matched word, true on success matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_check { matcher.captures_length, matcher.err = find_aux( matcher.haystack, @@ -993,20 +898,23 @@ matcher_match :: proc(matcher: ^Matcher) -> (word: string, ok: bool) #no_bounds_ return } -// get the capture at the correct spot +// get the capture at the "correct" spot, as spot 0 is reserved for the first match matcher_capture :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> string #no_bounds_check { - runtime.bounds_check_error_loc(loc, index + 1, MAXCAPTURES - 1) + runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1) cap := matcher.captures[index + 1] return matcher.haystack[cap.byte_start:cap.byte_end] } +// get the raw match out of the captures, skipping spot 0 matcher_capture_raw :: proc(matcher: ^Matcher, index: int, loc := #caller_location) -> Match #no_bounds_check { - runtime.bounds_check_error_loc(loc, index + 1, MAXCAPTURES - 1) + runtime.bounds_check_error_loc(loc, index + 1, MAX_CAPTURES - 1) return matcher.captures[index + 1] } +// alias matcher_gmatch :: matcher_match_iter +// iteratively match the haystack till it cant find any matches matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: bool) { if len(matcher.iter) > 0 { matcher.captures_length, matcher.err = find_aux( @@ -1035,6 +943,7 @@ matcher_match_iter :: proc(matcher: ^Matcher) -> (res: string, index: int, ok: b return } +// get a slice of all valid captures above the first match matcher_captures_slice :: proc(matcher: ^Matcher) -> []Match { return matcher.captures[1:matcher.captures_length] } diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/lua/test_core_text_lua.odin index b8e561765..f6b6e78a8 100644 --- a/tests/core/text/lua/test_core_text_lua.odin +++ b/tests/core/text/lua/test_core_text_lua.odin @@ -196,7 +196,7 @@ test_match :: proc(t: ^testing.T) { test_captures :: proc(t: ^testing.T) { Temp :: struct { pattern: string, - captures: [lua.MAXCAPTURES]lua.Match, + captures: [lua.MAX_CAPTURES]lua.Match, } // match all captures @@ -244,7 +244,7 @@ test_captures :: proc(t: ^testing.T) { { haystack := " 233 hello dolly" pattern := "%s*(%d+)%s+(%S+)" - captures: [lua.MAXCAPTURES]lua.Match + captures: [lua.MAX_CAPTURES]lua.Match lua.find_aux(haystack, pattern, 0, false, &captures) cap1 := captures[1] cap2 := captures[2] @@ -304,7 +304,7 @@ test_gsub :: proc(t: ^testing.T) { test_gfind :: proc(t: ^testing.T) { haystack := "test1 123 test2 123 test3" pattern := "%w+" - captures: [lua.MAXCAPTURES]lua.Match + captures: [lua.MAX_CAPTURES]lua.Match s := &haystack output := [?]string { "test1", "123", "test2", "123", "test3" } index: int From 67c1b364c4e6305a6c2dc0dbe25ff8da5b88909c Mon Sep 17 00:00:00 2001 From: sir-w7 Date: Wed, 21 Dec 2022 07:25:13 -0800 Subject: [PATCH 09/18] Fixed memory leak in dir_darwin.odin. --- core/os/dir_darwin.odin | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/os/dir_darwin.odin b/core/os/dir_darwin.odin index 1f54ef1ab..7d0f2936d 100644 --- a/core/os/dir_darwin.odin +++ b/core/os/dir_darwin.odin @@ -14,11 +14,12 @@ read_dir :: proc(fd: Handle, n: int, allocator := context.allocator) -> (fi: []F dirpath: string dirpath, err = absolute_path_from_handle(fd) - if err != ERROR_NONE { return } + defer delete(dirpath) + n := n size := n if n <= 0 { From e5d0417a6cfb4c140daca76be8ddd298c72cfd90 Mon Sep 17 00:00:00 2001 From: skytrias Date: Wed, 21 Dec 2022 21:36:50 +0100 Subject: [PATCH 10/18] folder name changed --- core/text/{lua => match}/strlib.odin | 0 tests/core/Makefile | 8 +-- .../test_core_text_match.odin} | 50 +++++++++---------- 3 files changed, 29 insertions(+), 29 deletions(-) rename core/text/{lua => match}/strlib.odin (100%) rename tests/core/text/{lua/test_core_text_lua.odin => match/test_core_text_match.odin} (86%) diff --git a/core/text/lua/strlib.odin b/core/text/match/strlib.odin similarity index 100% rename from core/text/lua/strlib.odin rename to core/text/match/strlib.odin diff --git a/tests/core/Makefile b/tests/core/Makefile index 8a36f7ea3..478d6ae2c 100644 --- a/tests/core/Makefile +++ b/tests/core/Makefile @@ -2,7 +2,7 @@ ODIN=../../odin PYTHON=$(shell which python3) all: download_test_assets image_test compress_test strings_test hash_test crypto_test noise_test encoding_test \ - math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test lua_strlib_test c_libc_test + math_test linalg_glsl_math_test filepath_test reflect_test os_exit_test i18n_test match_test c_libc_test download_test_assets: $(PYTHON) download_assets.py @@ -49,8 +49,8 @@ os_exit_test: i18n_test: $(ODIN) run text/i18n -out:test_core_i18n -lua_strlib_test: - $(ODIN) run text/lua -out:test_core_lua_strlib +match_test: + $(ODIN) run text/match -out:test_core_match c_libc_test: - $(ODIN) run c/libc -out:test_core_libc \ No newline at end of file + $(ODIN) run c/libc -out:test_core_libc diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/match/test_core_text_match.odin similarity index 86% rename from tests/core/text/lua/test_core_text_lua.odin rename to tests/core/text/match/test_core_text_match.odin index f6b6e78a8..79defb849 100644 --- a/tests/core/text/lua/test_core_text_lua.odin +++ b/tests/core/text/match/test_core_text_match.odin @@ -1,6 +1,6 @@ package test_strlib -import lua "core:text/lua" +import "core:text/match" import "core:testing" import "core:fmt" import "core:os" @@ -62,8 +62,8 @@ test_find :: proc(t: ^testing.T) { } for entry, i in ENTRIES { - matcher := lua.matcher_init(entry.s, entry.p, entry.offset) - start, end, ok := lua.matcher_find(&matcher) + matcher := match.matcher_init(entry.s, entry.p, entry.offset) + start, end, ok := match.matcher_find(&matcher) success := entry.match.ok == ok && start == entry.match.start && end == entry.match.end if failed(t, success) { @@ -179,8 +179,8 @@ test_match :: proc(t: ^testing.T) { } for entry, i in ENTRIES { - matcher := lua.matcher_init(entry.s, entry.p) - result, ok := lua.matcher_match(&matcher) + matcher := match.matcher_init(entry.s, entry.p) + result, ok := match.matcher_match(&matcher) success := entry.ok == ok && result == entry.result if failed(t, success) { @@ -196,12 +196,12 @@ test_match :: proc(t: ^testing.T) { test_captures :: proc(t: ^testing.T) { Temp :: struct { pattern: string, - captures: [lua.MAX_CAPTURES]lua.Match, + captures: [match.MAX_CAPTURES]match.Match, } // match all captures compare_captures :: proc(t: ^testing.T, test: ^Temp, haystack: string, comp: []string, loc := #caller_location) { - length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures) + length, err := match.find_aux(haystack, test.pattern, 0, false, &test.captures) if failed(t, len(comp) == length) { logf(t, "Captures Compare Failed -> Lengths %d != %d\n", len(comp), length) } @@ -218,7 +218,7 @@ test_captures :: proc(t: ^testing.T) { // match to expected results matches :: proc(t: ^testing.T, test: ^Temp, haystack: string, ok: bool, loc := #caller_location) { - length, err := lua.find_aux(haystack, test.pattern, 0, false, &test.captures) + length, err := match.find_aux(haystack, test.pattern, 0, false, &test.captures) result := length > 0 && err == .OK if failed(t, result == ok) { @@ -244,8 +244,8 @@ test_captures :: proc(t: ^testing.T) { { haystack := " 233 hello dolly" pattern := "%s*(%d+)%s+(%S+)" - captures: [lua.MAX_CAPTURES]lua.Match - lua.find_aux(haystack, pattern, 0, false, &captures) + captures: [match.MAX_CAPTURES]match.Match + match.find_aux(haystack, pattern, 0, false, &captures) cap1 := captures[1] cap2 := captures[2] text1 := haystack[cap1.byte_start:cap1.byte_end] @@ -265,28 +265,28 @@ gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) { @test test_gmatch :: proc(t: ^testing.T) { { - matcher := lua.matcher_init("testing this out 123", "%w+") + matcher := match.matcher_init("testing this out 123", "%w+") output := [?]string { "testing", "this", "out", "123" } - for match, index in lua.matcher_gmatch(&matcher) { + for match, index in match.matcher_gmatch(&matcher) { gmatch_check(t, index, output[:], match) } } { - matcher := lua.matcher_init("#afdde6", "%x%x") + matcher := match.matcher_init("#afdde6", "%x%x") output := [?]string { "af", "dd", "e6" } - for match, index in lua.matcher_gmatch(&matcher) { + for match, index in match.matcher_gmatch(&matcher) { gmatch_check(t, index, output[:], match) } } { - matcher := lua.matcher_init("testing outz captures yo outz outtz", "(out)z") + matcher := match.matcher_init("testing outz captures yo outz outtz", "(out)z") output := [?]string { "out", "out" } - for match, index in lua.matcher_gmatch(&matcher) { + for match, index in match.matcher_gmatch(&matcher) { gmatch_check(t, index, output[:], match) } } @@ -294,9 +294,9 @@ test_gmatch :: proc(t: ^testing.T) { @test test_gsub :: proc(t: ^testing.T) { - result := lua.gsub("testing123testing", "%d+", " sup ", context.temp_allocator) + result := match.gsub("testing123testing", "%d+", " sup ", context.temp_allocator) expect(t, result == "testing sup testing", "GSUB 0: failed") - result = lua.gsub("testing123testing", "%a+", "345", context.temp_allocator) + result = match.gsub("testing123testing", "%a+", "345", context.temp_allocator) expect(t, result == "345123345", "GSUB 1: failed") } @@ -304,12 +304,12 @@ test_gsub :: proc(t: ^testing.T) { test_gfind :: proc(t: ^testing.T) { haystack := "test1 123 test2 123 test3" pattern := "%w+" - captures: [lua.MAX_CAPTURES]lua.Match + captures: [match.MAX_CAPTURES]match.Match s := &haystack output := [?]string { "test1", "123", "test2", "123", "test3" } index: int - for word in lua.gfind(s, pattern, &captures) { + for word in match.gfind(s, pattern, &captures) { if failed(t, output[index] == word) { logf(t, "GFIND %d failed!\n", index) logf(t, "\t%s != %s\n", output[index], word) @@ -326,7 +326,7 @@ test_frontier :: proc(t: ^testing.T) { output: [3]string, } - call :: proc(data: rawptr, word: string, haystack: string, captures: []lua.Match) { + call :: proc(data: rawptr, word: string, haystack: string, captures: []match.Match) { temp := cast(^Temp) data if failed(temp.t, word == temp.output[temp.index]) { @@ -347,15 +347,15 @@ test_frontier :: proc(t: ^testing.T) { } // https://lua-users.org/wiki/FrontierPattern example taken from here - lua.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call) + match.gsub_with("THE (QUICK) brOWN FOx JUMPS", "%f[%a]%u+%f[%A]", &temp, call) } @test test_utf8 :: proc(t: ^testing.T) { - matcher := lua.matcher_init("恥ず べき恥 フク恥ロ", "%w+") + matcher := match.matcher_init("恥ず べき恥 フク恥ロ", "%w+") output := [?]string { "恥ず", "べき恥", "フク恥ロ" } - for match, index in lua.matcher_gmatch(&matcher) { + for match, index in match.matcher_gmatch(&matcher) { gmatch_check(t, index, output[:], match) } } @@ -363,7 +363,7 @@ test_utf8 :: proc(t: ^testing.T) { @test test_case_insensitive :: proc(t: ^testing.T) { { - pattern := lua.pattern_case_insensitive("test", 256, context.temp_allocator) + pattern := match.pattern_case_insensitive("test", 256, context.temp_allocator) goal := "[tT][eE][sS][tT]" if failed(t, pattern == goal) { From 94af3c288762129ae064b13f7979f3671bb5dcd3 Mon Sep 17 00:00:00 2001 From: skytrias Date: Wed, 21 Dec 2022 21:38:21 +0100 Subject: [PATCH 11/18] package name changed --- core/text/match/strlib.odin | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/text/match/strlib.odin b/core/text/match/strlib.odin index 12bb45aae..d299dc43d 100644 --- a/core/text/match/strlib.odin +++ b/core/text/match/strlib.odin @@ -1,4 +1,4 @@ -package strlib +package text_match import "core:runtime" import "core:unicode" From 63a0395a79505598ae89c756c7394765bd89c1aa Mon Sep 17 00:00:00 2001 From: skytrias Date: Wed, 21 Dec 2022 22:08:03 +0100 Subject: [PATCH 12/18] refactor SPECIALS_TABLE --- core/text/match/strlib.odin | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/core/text/match/strlib.odin b/core/text/match/strlib.odin index d299dc43d..b8c2861fa 100644 --- a/core/text/match/strlib.odin +++ b/core/text/match/strlib.odin @@ -558,13 +558,23 @@ push_captures :: proc( // SPECIALS := "^$*+?.([%-" // all special characters inside a small ascii array -SPECIALS_TABLE := [256]u8 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +SPECIALS_TABLE := [256]bool { + '^' = true, + '$' = true, + '*' = true, + '+' = true, + '?' = true, + '.' = true, + '(' = true, + '[' = true, + '%' = true, + '-' = true, +} // helper call to quick search for special characters index_special :: proc(text: string) -> int { - // TODO is this utf8 safe? for i in 0.. Date: Thu, 22 Dec 2022 01:22:31 +0100 Subject: [PATCH 13/18] Fixed issues with dir opening on macOS --- core/os/os_darwin.odin | 1 + 1 file changed, 1 insertion(+) diff --git a/core/os/os_darwin.odin b/core/os/os_darwin.odin index 4c32323ff..3c69740c2 100644 --- a/core/os/os_darwin.odin +++ b/core/os/os_darwin.odin @@ -350,6 +350,7 @@ open :: proc(path: string, flags: int = O_RDWR, mode: int = 0) -> (Handle, Errno when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 { if mode != 0 { + if mode != 0 && !is_dir_handle(handle) { err := fchmod(handle, cast(u16)mode) if err != 0 { _unix_close(handle) From fb562ea708b37ccfbbdba93afbbae5f1074f836b Mon Sep 17 00:00:00 2001 From: Platin21 Date: Thu, 22 Dec 2022 01:26:06 +0100 Subject: [PATCH 14/18] Adds error casting from last error if open fails --- core/os/os_darwin.odin | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/os/os_darwin.odin b/core/os/os_darwin.odin index 3c69740c2..c5addb2bb 100644 --- a/core/os/os_darwin.odin +++ b/core/os/os_darwin.odin @@ -345,16 +345,15 @@ open :: proc(path: string, flags: int = O_RDWR, mode: int = 0) -> (Handle, Errno cstr := strings.clone_to_cstring(path, context.temp_allocator) handle := _unix_open(cstr, i32(flags), u16(mode)) if handle == -1 { - return INVALID_HANDLE, 1 + return INVALID_HANDLE, cast(Errno)get_last_error() } when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 { - if mode != 0 { if mode != 0 && !is_dir_handle(handle) { err := fchmod(handle, cast(u16)mode) if err != 0 { _unix_close(handle) - return INVALID_HANDLE, 1 + return INVALID_HANDLE, cast(Errno)err } } } From b983ac548c0eb30c9be4b30a8d857a4ccebff4eb Mon Sep 17 00:00:00 2001 From: Platin21 Date: Thu, 22 Dec 2022 01:36:04 +0100 Subject: [PATCH 15/18] Moves check up and sets flag to rdonly if dir is opened.. --- core/os/os_darwin.odin | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/core/os/os_darwin.odin b/core/os/os_darwin.odin index c5addb2bb..b40edb410 100644 --- a/core/os/os_darwin.odin +++ b/core/os/os_darwin.odin @@ -342,21 +342,33 @@ get_last_error_string :: proc() -> string { } open :: proc(path: string, flags: int = O_RDWR, mode: int = 0) -> (Handle, Errno) { + isDir := is_dir_path(path) + flags := flags + if isDir { + /* + @INFO(Platin): To make it impossible to use the wrong flag for dir's + as you can't write to a dir only read which makes it fail to open + */ + flags = O_RDONLY + } + cstr := strings.clone_to_cstring(path, context.temp_allocator) handle := _unix_open(cstr, i32(flags), u16(mode)) if handle == -1 { return INVALID_HANDLE, cast(Errno)get_last_error() } -when ODIN_OS == .Darwin && ODIN_ARCH == .arm64 { - if mode != 0 && !is_dir_handle(handle) { + /* + @INFO(Platin): this is only done because O_CREATE for some reason fails to apply mode + should not happen if the handle is a directory + */ + if mode != 0 && !isDir { err := fchmod(handle, cast(u16)mode) if err != 0 { _unix_close(handle) return INVALID_HANDLE, cast(Errno)err } } -} return handle, 0 } From d904ae5191164f9fd8c1b1515400da12904aa933 Mon Sep 17 00:00:00 2001 From: hikari Date: Sat, 24 Dec 2022 08:27:15 +0200 Subject: [PATCH 16/18] Replaced opaque bit-shifts with readable constants for memory units --- core/mem/allocators.odin | 2 +- core/mem/mem.odin | 10 +++++----- core/mem/virtual/arena.odin | 4 ++-- core/runtime/core.odin | 6 ++++++ core/runtime/default_temporary_allocator.odin | 4 ++-- 5 files changed, 16 insertions(+), 10 deletions(-) diff --git a/core/mem/allocators.odin b/core/mem/allocators.odin index fc009621b..66da12959 100644 --- a/core/mem/allocators.odin +++ b/core/mem/allocators.odin @@ -153,7 +153,7 @@ scratch_allocator_proc :: proc(allocator_data: rawptr, mode: Allocator_Mode, s := (^Scratch_Allocator)(allocator_data) if s.data == nil { - DEFAULT_BACKING_SIZE :: 1<<22 + DEFAULT_BACKING_SIZE :: 4 * Megabyte if !(context.allocator.procedure != scratch_allocator_proc && context.allocator.data != allocator_data) { panic("cyclic initialization of the scratch allocator with itself") diff --git a/core/mem/mem.odin b/core/mem/mem.odin index f7be69adc..bc77ca287 100644 --- a/core/mem/mem.odin +++ b/core/mem/mem.odin @@ -3,11 +3,11 @@ package mem import "core:runtime" import "core:intrinsics" -Byte :: 1 -Kilobyte :: 1024 * Byte -Megabyte :: 1024 * Kilobyte -Gigabyte :: 1024 * Megabyte -Terabyte :: 1024 * Gigabyte +Byte :: runtime.Byte +Kilobyte :: runtime.Kilobyte +Megabyte :: runtime.Megabyte +Gigabyte :: runtime.Gigabyte +Terabyte :: runtime.Terabyte set :: proc "contextless" (data: rawptr, value: byte, len: int) -> rawptr { return runtime.memset(data, i32(value), len) diff --git a/core/mem/virtual/arena.odin b/core/mem/virtual/arena.odin index e901cf6f3..0ddb116fd 100644 --- a/core/mem/virtual/arena.odin +++ b/core/mem/virtual/arena.odin @@ -19,11 +19,11 @@ Arena :: struct { // 1 MiB should be enough to start with -DEFAULT_ARENA_STATIC_COMMIT_SIZE :: 1<<20 +DEFAULT_ARENA_STATIC_COMMIT_SIZE :: mem.Megabyte DEFAULT_ARENA_GROWING_MINIMUM_BLOCK_SIZE :: DEFAULT_ARENA_STATIC_COMMIT_SIZE // 1 GiB on 64-bit systems, 128 MiB on 32-bit systems by default -DEFAULT_ARENA_STATIC_RESERVE_SIZE :: 1<<30 when size_of(uintptr) == 8 else 1<<27 +DEFAULT_ARENA_STATIC_RESERVE_SIZE :: mem.Gigabyte when size_of(uintptr) == 8 else 128 * mem.Megabyte diff --git a/core/runtime/core.odin b/core/runtime/core.odin index 108609f78..a74bf4285 100644 --- a/core/runtime/core.odin +++ b/core/runtime/core.odin @@ -329,6 +329,12 @@ Allocator :: struct { data: rawptr, } +Byte :: 1 +Kilobyte :: 1024 * Byte +Megabyte :: 1024 * Kilobyte +Gigabyte :: 1024 * Megabyte +Terabyte :: 1024 * Gigabyte + // Logging stuff Logger_Level :: enum uint { diff --git a/core/runtime/default_temporary_allocator.odin b/core/runtime/default_temporary_allocator.odin index 176634ff9..b71cd103a 100644 --- a/core/runtime/default_temporary_allocator.odin +++ b/core/runtime/default_temporary_allocator.odin @@ -1,6 +1,6 @@ package runtime -DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 1<<22) +DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE: int : #config(DEFAULT_TEMP_ALLOCATOR_BACKING_SIZE, 4 * Megabyte) when ODIN_OS == .Freestanding || ODIN_OS == .JS || ODIN_DEFAULT_TO_NIL_ALLOCATOR { @@ -197,4 +197,4 @@ default_temp_allocator :: proc(allocator: ^Default_Temp_Allocator) -> Allocator procedure = default_temp_allocator_proc, data = allocator, } -} \ No newline at end of file +} From 1d6f7680a1abffbb28eb61d27a8cd7966635c3ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mikkel=20Hjortsh=C3=B8j?= Date: Sat, 24 Dec 2022 15:44:32 +0100 Subject: [PATCH 17/18] Update stale.yml Update stale action to *not* delete issues/PRs anymore and only mark them as stale, also update the version --- .github/workflows/stale.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 341a09409..10f193056 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Close Stale Issues - uses: actions/stale@v4.1.0 + uses: actions/stale@v7.0.0 with: # stale-issue-message: | # Hello! @@ -36,7 +36,7 @@ jobs: # The motivation for this automation is to help prioritize issues in the backlog and not ignore, reject, or belittle anyone.. days-before-stale: 120 - days-before-close: 30 + days-before-close: -1 exempt-draft-pr: true ascending: true operations-per-run: 1000 From 0bb93d40d3fb5bd973dccdea75842ceb7cab0a09 Mon Sep 17 00:00:00 2001 From: Phil Date: Thu, 29 Dec 2022 16:10:13 -0800 Subject: [PATCH 18/18] fixup are_types_identical for comparing procs and checking if parameter names differ --- src/types.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/types.cpp b/src/types.cpp index 28628fd97..b18ba84c2 100644 --- a/src/types.cpp +++ b/src/types.cpp @@ -2775,8 +2775,8 @@ bool are_types_identical_internal(Type *x, Type *y, bool check_tuple_names) { x->Proc.variadic == y->Proc.variadic && x->Proc.diverging == y->Proc.diverging && x->Proc.optional_ok == y->Proc.optional_ok && - are_types_identical(x->Proc.params, y->Proc.params) && - are_types_identical(x->Proc.results, y->Proc.results); + are_types_identical_internal(x->Proc.params, y->Proc.params, check_tuple_names) && + are_types_identical_internal(x->Proc.results, y->Proc.results, check_tuple_names); } break;