diff --git a/core/path/match.odin b/core/path/match.odin new file mode 100644 index 000000000..555c1b05c --- /dev/null +++ b/core/path/match.odin @@ -0,0 +1,205 @@ +package path + +import "core:strings" +import "core:unicode/utf8" + +Match_Error :: enum { + None, + Syntax_Error, +} + +// match states whether "name" matches the shell pattern +// Pattern syntax is: +// pattern: +// {term} +// term: +// '*' matches any sequence of non-/ characters +// '?' matches any single non-/ character +// '[' ['^'] { character-range } ']' +// character classification (cannot be empty) +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// match requires that the pattern matches the entirety of the name, not just a substring +// The only possible error returned is .Syntax_Error +// +// NOTE(bill): This is effectively the shell pattern matching system found +// +match :: proc(pattern, name: string) -> (matched: bool, err: Match_Error) { + pattern, name := pattern, name; + pattern_loop: for len(pattern) > 0 { + star: bool; + chunk: string; + star, chunk, pattern = scan_chunk(pattern); + if star && chunk == "" { + return !strings.contains(name, "/"), .None; + } + + t: string; + ok: bool; + t, ok, err = match_chunk(chunk, name); + + if ok && (len(t) == 0 || len(pattern) > 0) { + name = t; + continue; + } + if err != .None { + return; + } + if star { + for i := 0; i < len(name) && name[i] != '/'; i += 1 { + t, ok, err = match_chunk(chunk, name[i+1:]); + if ok { + if len(pattern) == 0 && len(t) > 0 { + continue; + } + name = t; + continue pattern_loop; + } + if err != .None { + return; + } + } + } + + return false, .None; + } + + return len(name) == 0, .None; +} + + +@(private="file") +scan_chunk :: proc(pattern: string) -> (star: bool, chunk, rest: string) { + pattern := pattern; + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:]; + star = true; + } + in_range := false; + i: int; + + scan_loop: for i = 0; i < len(pattern); i += 1 { + switch pattern[i] { + case '\\': + if i+1 < len(pattern) { + i += 1; + } + case '[': + in_range = true; + case ']': + in_range = false; + case '*': + if !in_range { + break scan_loop; + } + + } + } + return star, pattern[:i], pattern[i:]; +} + +@(private="file") +match_chunk :: proc(chunk, s: string) -> (rest: string, ok: bool, err: Match_Error) { + chunk, s := chunk, s; + for len(chunk) > 0 { + if len(s) == 0 { + return; + } + switch chunk[0] { + case '[': + r, w := utf8.decode_rune_in_string(s); + s = s[w:]; + chunk = chunk[1:]; + is_negated := false; + if len(chunk) > 0 && chunk[0] == '^' { + is_negated = true; + chunk = chunk[1:]; + } + match := false; + range_count := 0; + for { + if len(chunk) > 0 && chunk[0] == ']' && range_count > 0 { + chunk = chunk[1:]; + break; + } + lo, hi: rune; + if lo, chunk, err = get_escape(chunk); err != .None { + return; + } + hi = lo; + if chunk[0] == '-' { + if hi, chunk, err = get_escape(chunk[1:]); err != .None { + return; + } + } + + if lo <= r && r <= hi { + match = true; + } + range_count += 1; + } + if match == is_negated { + return; + } + + case '?': + if s[0] == '/' { + return; + } + _, w := utf8.decode_rune_in_string(s); + s = s[w:]; + chunk = chunk[1:]; + + case '\\': + chunk = chunk[1:]; + if len(chunk) == 0 { + err = .Syntax_Error; + return; + } + fallthrough; + case: + if chunk[0] != s[0] { + return; + } + s = s[1:]; + chunk = chunk[1:]; + + } + } + return s, true, .None; +} + +@(private="file") +get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Match_Error) { + if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { + err = .Syntax_Error; + return; + } + chunk := chunk; + if chunk[0] == '\\' { + chunk = chunk[1:]; + if len(chunk) == 0 { + err = .Syntax_Error; + return; + } + } + + w: int; + r, w = utf8.decode_rune_in_string(chunk); + if r == utf8.RUNE_ERROR && w == 1 { + err = .Syntax_Error; + } + + next_chunk = chunk[w:]; + if len(next_chunk) == 0 { + err = .Syntax_Error; + } + + return; +} diff --git a/core/path/path.odin b/core/path/path.odin index e28e11fde..42a5e73ce 100644 --- a/core/path/path.odin +++ b/core/path/path.odin @@ -1,90 +1,210 @@ package path import "core:strings" +import "core:runtime" import "core:unicode/utf8" +// is_separator_byte checks whether the byte is a valid separator character +is_separator_byte :: proc(c: byte) -> bool { + switch c { + case '/': return true; + case '\\': return ODIN_OS == "windows"; + } + return false; +} -// returns everything preceding the last path element -dir :: proc(path: string, new := false, allocator := context.allocator) -> string { - if path == "" { - return ""; + +// is_abs checks whether the path is absolute +is_abs :: proc(path: string) -> bool { + if len(path) > 0 && path[0] == '/' { + return true; + } + when ODIN_OS == "windows" { + if len(path) > 2 { + switch path[0] { + case 'A'..'Z', 'a'..'z': + return path[1] == ':' && is_separator_byte(path[2]); + } + } + } + return false; +} + + +// base returns the last element of path +// Trailing slashes are removed +// If the path is empty, it returns ".". +// If the path is all slashes, it returns "/" +base :: proc(path: string, new := false, allocator := context.allocator) -> (last_element: string) { + defer if new { + last_element = strings.clone(last_element, allocator); } - for i := len(path) - 1; i >= 0; i -= 1 { - if path[i] == '/' || path[i] == '\\' { - if path[:i] == "" { - // path is root - return new ? strings.clone(SEPARATOR_STRING, allocator) : SEPARATOR_STRING; - } else { - return new ? strings.clone(path[:i], allocator) : path[:i]; + if path == "" { + last_element = "."; + return; + } + + + path := path; + + for len(path) > 0 && is_separator_byte(path[len(path)-1]) { + path = path[:len(path)-1]; + } + if i := strings.last_index_any(path, OS_SEPARATORS); i >= 0 { + path = path[i+1:]; + } + + if path == "" { + last_element = "/"; + } else { + last_element = path; + } + return; +} + +// dir returns all but the last element of path, typically the path's directory. +// After dropping the final element using it, the path is cleaned and trailing slashes are removed +// If the path is empty, it returns "." +// If the path consists entirely of slashes followed by non-slash bytes, it returns a single slash +// In any other case, the returned path does not end in a slash +dir :: proc(path: string, allocator := context.allocator) -> string { + directory, _ := split(path); + return clean(directory, allocator); +} + + +// split splits path immediately following the last slash, +// separating it into a directory and file name component. +// If there is no slash in path, it returns an empty dir and file set to path +// The returned values have the property that path = dir+file +split :: proc(path: string) -> (dir, file: string) { + i := strings.last_index_any(path, OS_SEPARATORS); + return path[:i+1], path[i+1:]; +} + +// split_elements splits the path elements into slices of the original path string +split_elements :: proc(path: string, allocator := context.allocator) -> []string { + return strings.split_multi(path, OS_SEPARATORS_ARRAY, true, allocator); +} + +// clean returns the shortest path name equivalent to path through lexical analysis only +// It applies the following rules iterative until done: +// +// 1) replace multiple slashes with one +// 2) remove each . path name element +// 3) remove inner .. path name element +// 4) remove .. that begin a rooted path ("/.." becomes "/") +// +clean :: proc(path: string, allocator := context.allocator) -> string { + context.allocator = allocator; + + if path == "" { + return strings.clone("."); + } + + // NOTE(bill): do not use is_separator_byte because window paths do not follow this convention + rooted := path[0] == '/'; + n := len(path); + + out := &Lazy_Buffer{s = path}; + + // Check for ../../.. prefixes + r, dot_dot := 0, 0; + if rooted { + lazy_buffer_append(out, '/'); + r, dot_dot = 1, 1; + } + + for r < n { + switch { + case is_separator_byte(path[r]): + r += 1; + case path[r] == '.' && (r+1 == n || is_separator_byte(path[r+1])): + r += 1; + case path[r] == '.' && path[r+1] == '.' && (r+2 == n || is_separator_byte(path[r+2])): + r += 2; + switch { + case out.w > dot_dot: + out.w -= 1; + for out.w > dot_dot && !is_separator_byte(lazy_buffer_index(out, out.w)) { + out.w -= 1; + } + + case !rooted: + if out.w > 0 { + lazy_buffer_append(out, '/'); + } + lazy_buffer_append(out, '.'); + lazy_buffer_append(out, '.'); + dot_dot = out.w; + } + case: + if rooted && out.w != 1 || !rooted && out.w != 0 { + lazy_buffer_append(out, '/'); + } + for ; r < n && !is_separator_byte(path[r]); r += 1 { + lazy_buffer_append(out, path[r]); } } } - // path doesn't contain any folder structure + if out.w == 0 { + delete(out.b); + return strings.clone("."); + } + + return lazy_buffer_string(out); +} + +// join joins numerous path elements into a single path +join :: proc(elems: ..string, allocator := context.allocator) -> string { + context.allocator = allocator; + for elem, i in elems { + if elem != "" { + s := strings.join(elems[i:], "/"); + return clean(s); + } + } return ""; } -// returns the final path element -base :: proc(path: string, new := false, allocator := context.allocator) -> string { - if path == "" { - return ""; - } - - end := len(path) - 1; - - for i := end; i >= 0; i -= 1 { - switch path[i] { - case '/', '\\': - if i != end { - return new ? strings.clone(path[i+1:], allocator) : path[i+1:]; - } else { - end = i; // we don't want trailing slashes - } - } - } - - // path doesn't contain any folder structure, return entire path - return new ? strings.clone(path, allocator) : path; -} - -// returns the final path element, excluding the file extension if there is one -name :: proc(path: string, new := false, allocator := context.allocator) -> string { - if path == "" { - return ""; - } - - end := len(path) - 1; - dot := end; - - for i := end; i >= 0; i -= 1 { - switch path[i] { - case '.': dot = (dot == end ? i : dot); - case '/', '\\': return new ? strings.clone(path[i+1:dot], allocator) : path[i+1:dot]; - } - } - - // path doesn't contain any folder structure or file extensions; assumed to be a valid file name - return new ? strings.clone(path, allocator) : path; -} - -// returns the file extension, if there is one +// ext returns the file name extension used by "path" +// The extension is the suffix beginning at the file fot in the last slash separated element of "path" +// The path is empty if there is no dot ext :: proc(path: string, new := false, allocator := context.allocator) -> string { - if path == "" { - return ""; - } - - for i := len(path)-1; i >= 0; i -= 1 { - switch path[i] { - case '/', '\\': return ""; - case '.': return new ? strings.clone(path[i+1:], allocator) : path[i+1:]; + for i := len(path)-1; i >= 0 && !is_separator_byte(path[i]); i -= 1 { + if path[i] == '.' { + res := path[i:]; + if new { + res = strings.clone(res, allocator); + } + return res; } } - - // path does not include a file extension return ""; } +// name returns the file without the base and without the extension +name :: proc(path: string, new := false, allocator := context.allocator) -> (name: string) { + _, file := split(path); + name = file; + + defer if new { + name = strings.clone(name, allocator); + } + + for i := len(file)-1; i >= 0 && !is_separator_byte(file[i]); i -= 1 { + if file[i] == '.' { + name = file[:i]; + return; + } + } + return file; + +} + + rel :: proc{rel_between, rel_current}; @@ -177,7 +297,7 @@ rel_between :: proc(from, to: string, allocator := context.allocator) -> string buffer := make([]byte, 2 + len(to), allocator); buffer[0] = '.'; - buffer[1] = SEPARATOR; + buffer[1] = '/'; copy(buffer[2:], to); return string(buffer); @@ -188,7 +308,7 @@ rel_between :: proc(from, to: string, allocator := context.allocator) -> string for i in 0.. string { } -// splits the path elements into slices of the original path string -split :: proc(s: string, allocator := context.allocator) -> []string { - return strings.split_multi(s, []string{"\\", "/"}, true, allocator); + + + + + + +/* + Lazy_Buffer is a lazily made path buffer + When it does allocate, it uses the context.allocator + */ +@(private) +Lazy_Buffer :: struct { + s: string, + b: []byte, + w: int, // write index +} + +@(private) +lazy_buffer_index :: proc(lb: ^Lazy_Buffer, i: int) -> byte { + if lb.b != nil { + return lb.b[i]; + } + return lb.s[i]; +} +@(private) +lazy_buffer_append :: proc(lb: ^Lazy_Buffer, c: byte) { + if lb.b == nil { + if lb.w < len(lb.s) && lb.s[lb.w] == c { + lb.w += 1; + return; + } + lb.b = make([]byte, len(lb.s)); + copy(lb.b, lb.s[:lb.w]); + } + lb.b[lb.w] = c; + lb.w += 1; +} +@(private) +lazy_buffer_string :: proc(lb: ^Lazy_Buffer) -> string { + if lb.b == nil { + return strings.clone(lb.s[:lb.w]); + } + return string(lb.b[:lb.w]); } diff --git a/core/path/path_unix.odin b/core/path/path_unix.odin index 29c4c3da6..b380d8187 100644 --- a/core/path/path_unix.odin +++ b/core/path/path_unix.odin @@ -9,9 +9,11 @@ import "core:strings" MAX :: 4096; // @note(bp): apparently PATH_MAX is bullshit -SEPARATOR :: '/'; -SEPARATOR_STRING :: "/"; +OS_SEPARATOR :: '/'; +OS_SEPARATOR_STRING :: "/"; +OS_SEPARATORS :: `/`; +OS_SEPARATORS_ARRAY :: []string{`/`}; @(private) null_term :: proc(str: string) -> string { diff --git a/core/path/path_windows.odin b/core/path/path_windows.odin index e38d9087f..bc0360880 100644 --- a/core/path/path_windows.odin +++ b/core/path/path_windows.odin @@ -4,9 +4,11 @@ import "core:strings" import win32 "core:sys/windows" -SEPARATOR :: '\\'; -SEPARATOR_STRING :: "\\"; +OS_SEPARATOR :: '\\'; +OS_SEPARATOR_STRING :: "\\"; +OS_SEPARATORS :: `/\`; +OS_SEPARATORS_ARRAY :: []string{`/`, `\`}; @(private) null_term :: proc "contextless" (str: string) -> string {