From eb5523d5d3326fce3c0d00ca071051875715447f Mon Sep 17 00:00:00 2001 From: skytrias Date: Thu, 1 Dec 2022 05:18:24 +0100 Subject: [PATCH] case insensitive helper call --- core/text/lua/strlib.odin | 56 +++++++++++++++++++-- tests/core/text/lua/test_core_text_lua.odin | 52 ++++++++++--------- 2 files changed, 81 insertions(+), 27 deletions(-) diff --git a/core/text/lua/strlib.odin b/core/text/lua/strlib.odin index 2d4543f75..ca95367e9 100644 --- a/core/text/lua/strlib.odin +++ b/core/text/lua/strlib.odin @@ -788,12 +788,23 @@ gsub_allocator :: proc( return gsub_builder(&builder, haystack, pattern, replace) } +Gsub_Proc :: proc( + // optional passed data + data: rawptr, + // word match found + word: string, + // current haystack for found captures + haystack: string, + // found captures - empty for no captures + captures: []Match, +) + // call a procedure on every match in the haystack gsub_with :: proc( haystack: string, pattern: string, data: rawptr, - call: proc(data: rawptr, word: string), + call: Gsub_Proc, ) { // find matches captures: [MAXCAPTURES]Match @@ -810,7 +821,7 @@ gsub_with :: proc( cap := captures[0] word := haystack[cap.byte_start:cap.byte_end] - call(data, word) + call(data, word, haystack, captures[1:length]) // advance string till end haystack = haystack[cap.byte_end:] @@ -837,4 +848,43 @@ gfind :: proc( } return -} \ No newline at end of file +} + +// rebuilds a pattern into a case insensitive pattern +pattern_case_insensitive_builder :: proc( + builder: ^strings.Builder, + pattern: string, +) -> (res: string) { + p := pattern + last_percent: bool + + for len(p) > 0 { + char, size := utf8.decode_rune_in_string(p) + + if unicode.is_alpha(char) && !last_percent { + // write character class in manually + strings.write_byte(builder, '[') + strings.write_rune(builder, unicode.to_lower(char)) + strings.write_rune(builder, unicode.to_upper(char)) + strings.write_byte(builder, ']') + } else { + strings.write_rune(builder, char) + } + + last_percent = char == L_ESC + p = p[size:] + } + + return strings.to_string(builder^) +} + +pattern_case_insensitive_allocator :: proc( + pattern: string, + cap: int = 256, + allocator := context.allocator, +) -> (res: string) { + builder := strings.builder_make(0, cap, context.temp_allocator) + return pattern_case_insensitive_builder(&builder, pattern) +} + +pattern_case_insensitive :: proc { pattern_case_insensitive_builder, pattern_case_insensitive_allocator } \ No newline at end of file diff --git a/tests/core/text/lua/test_core_text_lua.odin b/tests/core/text/lua/test_core_text_lua.odin index 630631fc2..ed7d6c58f 100644 --- a/tests/core/text/lua/test_core_text_lua.odin +++ b/tests/core/text/lua/test_core_text_lua.odin @@ -261,14 +261,15 @@ test_captures :: proc(t: ^testing.T) { } } +gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) { + if failed(t, a[index] == b) { + logf(t, "GMATCH %d failed!\n", index) + logf(t, "\t%s != %s\n", a[index], b) + } +} + @test test_gmatch :: proc(t: ^testing.T) { - gmatch_check :: proc(t: ^testing.T, index: int, a: []string, b: string) { - if failed(t, a[index] == b) { - logf(t, "GMATCH %d failed!\n", index) - logf(t, "\t%s != %s\n", a[index], b) - } - } { haystack := "testing this out 123" @@ -347,7 +348,7 @@ test_frontier :: proc(t: ^testing.T) { output: [3]string, } - call :: proc(data: rawptr, word: string) { + call :: proc(data: rawptr, word: string, haystack: string, captures: []lua.Match) { temp := cast(^Temp) data if failed(temp.t, word == temp.output[temp.index]) { @@ -373,28 +374,31 @@ test_frontier :: proc(t: ^testing.T) { @test test_utf8 :: proc(t: ^testing.T) { - // { - // haystack := "恥ずべき恥フク恥ロ" - // s := &haystack - // captures: [lua.MAXCAPTURES]lua.Match - - // for word in lua.gmatch(s, "恥", &captures) { - // fmt.eprintln(word) - // } - // } - { - haystack := "恥ずべき恥フク恥ロ" + haystack := "恥ず べき恥 フク恥ロ" s := &haystack captures: [lua.MAXCAPTURES]lua.Match + output := [?]string { "恥ず", "べき恥", "フク恥ロ" } + index: int - for word in lua.gmatch(s, "w+", &captures) { - fmt.eprintln(word) + for word in lua.gmatch(s, "%w+", &captures) { + gmatch_check(t, index, output[:], word) + index += 1 } } +} - // captures: [MAXCAPTURES]Match - // length, err := lua.find_aux("damn, pattern,) +@test +test_case_insensitive :: proc(t: ^testing.T) { + { + pattern := lua.pattern_case_insensitive("test", 256, context.temp_allocator) + goal := "[tT][eE][sS][tT]" + + if failed(t, pattern == goal) { + logf(t, "Case Insensitive Pattern doesn't match result\n") + logf(t, "\t%s != %s\n", pattern, goal) + } + } } main :: proc() { @@ -410,8 +414,8 @@ main :: proc() { test_gsub(&t) test_gfind(&t) test_frontier(&t) - - // test_utf8(&t) + test_utf8(&t) + test_case_insensitive(&t) fmt.wprintf(w, "%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) if TEST_fail > 0 {