Update package path and fix bugs; add path.match which uses shell pattern matching

This commit is contained in:
gingerBill
2020-09-25 11:43:51 +01:00
parent 654b24e514
commit b42c7f9161
4 changed files with 444 additions and 75 deletions

205
core/path/match.odin Normal file
View File

@@ -0,0 +1,205 @@
package path
import "core:strings"
import "core:unicode/utf8"
Match_Error :: enum {
None,
Syntax_Error,
}
// match states whether "name" matches the shell pattern
// Pattern syntax is:
// pattern:
// {term}
// term:
// '*' matches any sequence of non-/ characters
// '?' matches any single non-/ character
// '[' ['^'] { character-range } ']'
// character classification (cannot be empty)
// c matches character c (c != '*', '?', '\\', '[')
// '\\' c matches character c
//
// character-range
// c matches character c (c != '\\', '-', ']')
// '\\' c matches character c
// lo '-' hi matches character c for lo <= c <= hi
//
// match requires that the pattern matches the entirety of the name, not just a substring
// The only possible error returned is .Syntax_Error
//
// NOTE(bill): This is effectively the shell pattern matching system found
//
match :: proc(pattern, name: string) -> (matched: bool, err: Match_Error) {
pattern, name := pattern, name;
pattern_loop: for len(pattern) > 0 {
star: bool;
chunk: string;
star, chunk, pattern = scan_chunk(pattern);
if star && chunk == "" {
return !strings.contains(name, "/"), .None;
}
t: string;
ok: bool;
t, ok, err = match_chunk(chunk, name);
if ok && (len(t) == 0 || len(pattern) > 0) {
name = t;
continue;
}
if err != .None {
return;
}
if star {
for i := 0; i < len(name) && name[i] != '/'; i += 1 {
t, ok, err = match_chunk(chunk, name[i+1:]);
if ok {
if len(pattern) == 0 && len(t) > 0 {
continue;
}
name = t;
continue pattern_loop;
}
if err != .None {
return;
}
}
}
return false, .None;
}
return len(name) == 0, .None;
}
@(private="file")
scan_chunk :: proc(pattern: string) -> (star: bool, chunk, rest: string) {
pattern := pattern;
for len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:];
star = true;
}
in_range := false;
i: int;
scan_loop: for i = 0; i < len(pattern); i += 1 {
switch pattern[i] {
case '\\':
if i+1 < len(pattern) {
i += 1;
}
case '[':
in_range = true;
case ']':
in_range = false;
case '*':
if !in_range {
break scan_loop;
}
}
}
return star, pattern[:i], pattern[i:];
}
@(private="file")
match_chunk :: proc(chunk, s: string) -> (rest: string, ok: bool, err: Match_Error) {
chunk, s := chunk, s;
for len(chunk) > 0 {
if len(s) == 0 {
return;
}
switch chunk[0] {
case '[':
r, w := utf8.decode_rune_in_string(s);
s = s[w:];
chunk = chunk[1:];
is_negated := false;
if len(chunk) > 0 && chunk[0] == '^' {
is_negated = true;
chunk = chunk[1:];
}
match := false;
range_count := 0;
for {
if len(chunk) > 0 && chunk[0] == ']' && range_count > 0 {
chunk = chunk[1:];
break;
}
lo, hi: rune;
if lo, chunk, err = get_escape(chunk); err != .None {
return;
}
hi = lo;
if chunk[0] == '-' {
if hi, chunk, err = get_escape(chunk[1:]); err != .None {
return;
}
}
if lo <= r && r <= hi {
match = true;
}
range_count += 1;
}
if match == is_negated {
return;
}
case '?':
if s[0] == '/' {
return;
}
_, w := utf8.decode_rune_in_string(s);
s = s[w:];
chunk = chunk[1:];
case '\\':
chunk = chunk[1:];
if len(chunk) == 0 {
err = .Syntax_Error;
return;
}
fallthrough;
case:
if chunk[0] != s[0] {
return;
}
s = s[1:];
chunk = chunk[1:];
}
}
return s, true, .None;
}
@(private="file")
get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Match_Error) {
if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
err = .Syntax_Error;
return;
}
chunk := chunk;
if chunk[0] == '\\' {
chunk = chunk[1:];
if len(chunk) == 0 {
err = .Syntax_Error;
return;
}
}
w: int;
r, w = utf8.decode_rune_in_string(chunk);
if r == utf8.RUNE_ERROR && w == 1 {
err = .Syntax_Error;
}
next_chunk = chunk[w:];
if len(next_chunk) == 0 {
err = .Syntax_Error;
}
return;
}

View File

@@ -1,90 +1,210 @@
package path
import "core:strings"
import "core:runtime"
import "core:unicode/utf8"
// is_separator_byte checks whether the byte is a valid separator character
is_separator_byte :: proc(c: byte) -> bool {
switch c {
case '/': return true;
case '\\': return ODIN_OS == "windows";
}
return false;
}
// returns everything preceding the last path element
dir :: proc(path: string, new := false, allocator := context.allocator) -> string {
if path == "" {
return "";
// is_abs checks whether the path is absolute
is_abs :: proc(path: string) -> bool {
if len(path) > 0 && path[0] == '/' {
return true;
}
when ODIN_OS == "windows" {
if len(path) > 2 {
switch path[0] {
case 'A'..'Z', 'a'..'z':
return path[1] == ':' && is_separator_byte(path[2]);
}
}
}
return false;
}
// base returns the last element of path
// Trailing slashes are removed
// If the path is empty, it returns ".".
// If the path is all slashes, it returns "/"
base :: proc(path: string, new := false, allocator := context.allocator) -> (last_element: string) {
defer if new {
last_element = strings.clone(last_element, allocator);
}
for i := len(path) - 1; i >= 0; i -= 1 {
if path[i] == '/' || path[i] == '\\' {
if path[:i] == "" {
// path is root
return new ? strings.clone(SEPARATOR_STRING, allocator) : SEPARATOR_STRING;
} else {
return new ? strings.clone(path[:i], allocator) : path[:i];
if path == "" {
last_element = ".";
return;
}
path := path;
for len(path) > 0 && is_separator_byte(path[len(path)-1]) {
path = path[:len(path)-1];
}
if i := strings.last_index_any(path, OS_SEPARATORS); i >= 0 {
path = path[i+1:];
}
if path == "" {
last_element = "/";
} else {
last_element = path;
}
return;
}
// dir returns all but the last element of path, typically the path's directory.
// After dropping the final element using it, the path is cleaned and trailing slashes are removed
// If the path is empty, it returns "."
// If the path consists entirely of slashes followed by non-slash bytes, it returns a single slash
// In any other case, the returned path does not end in a slash
dir :: proc(path: string, allocator := context.allocator) -> string {
directory, _ := split(path);
return clean(directory, allocator);
}
// split splits path immediately following the last slash,
// separating it into a directory and file name component.
// If there is no slash in path, it returns an empty dir and file set to path
// The returned values have the property that path = dir+file
split :: proc(path: string) -> (dir, file: string) {
i := strings.last_index_any(path, OS_SEPARATORS);
return path[:i+1], path[i+1:];
}
// split_elements splits the path elements into slices of the original path string
split_elements :: proc(path: string, allocator := context.allocator) -> []string {
return strings.split_multi(path, OS_SEPARATORS_ARRAY, true, allocator);
}
// clean returns the shortest path name equivalent to path through lexical analysis only
// It applies the following rules iterative until done:
//
// 1) replace multiple slashes with one
// 2) remove each . path name element
// 3) remove inner .. path name element
// 4) remove .. that begin a rooted path ("/.." becomes "/")
//
clean :: proc(path: string, allocator := context.allocator) -> string {
context.allocator = allocator;
if path == "" {
return strings.clone(".");
}
// NOTE(bill): do not use is_separator_byte because window paths do not follow this convention
rooted := path[0] == '/';
n := len(path);
out := &Lazy_Buffer{s = path};
// Check for ../../.. prefixes
r, dot_dot := 0, 0;
if rooted {
lazy_buffer_append(out, '/');
r, dot_dot = 1, 1;
}
for r < n {
switch {
case is_separator_byte(path[r]):
r += 1;
case path[r] == '.' && (r+1 == n || is_separator_byte(path[r+1])):
r += 1;
case path[r] == '.' && path[r+1] == '.' && (r+2 == n || is_separator_byte(path[r+2])):
r += 2;
switch {
case out.w > dot_dot:
out.w -= 1;
for out.w > dot_dot && !is_separator_byte(lazy_buffer_index(out, out.w)) {
out.w -= 1;
}
case !rooted:
if out.w > 0 {
lazy_buffer_append(out, '/');
}
lazy_buffer_append(out, '.');
lazy_buffer_append(out, '.');
dot_dot = out.w;
}
case:
if rooted && out.w != 1 || !rooted && out.w != 0 {
lazy_buffer_append(out, '/');
}
for ; r < n && !is_separator_byte(path[r]); r += 1 {
lazy_buffer_append(out, path[r]);
}
}
}
// path doesn't contain any folder structure
if out.w == 0 {
delete(out.b);
return strings.clone(".");
}
return lazy_buffer_string(out);
}
// join joins numerous path elements into a single path
join :: proc(elems: ..string, allocator := context.allocator) -> string {
context.allocator = allocator;
for elem, i in elems {
if elem != "" {
s := strings.join(elems[i:], "/");
return clean(s);
}
}
return "";
}
// returns the final path element
base :: proc(path: string, new := false, allocator := context.allocator) -> string {
if path == "" {
return "";
}
end := len(path) - 1;
for i := end; i >= 0; i -= 1 {
switch path[i] {
case '/', '\\':
if i != end {
return new ? strings.clone(path[i+1:], allocator) : path[i+1:];
} else {
end = i; // we don't want trailing slashes
}
}
}
// path doesn't contain any folder structure, return entire path
return new ? strings.clone(path, allocator) : path;
}
// returns the final path element, excluding the file extension if there is one
name :: proc(path: string, new := false, allocator := context.allocator) -> string {
if path == "" {
return "";
}
end := len(path) - 1;
dot := end;
for i := end; i >= 0; i -= 1 {
switch path[i] {
case '.': dot = (dot == end ? i : dot);
case '/', '\\': return new ? strings.clone(path[i+1:dot], allocator) : path[i+1:dot];
}
}
// path doesn't contain any folder structure or file extensions; assumed to be a valid file name
return new ? strings.clone(path, allocator) : path;
}
// returns the file extension, if there is one
// ext returns the file name extension used by "path"
// The extension is the suffix beginning at the file fot in the last slash separated element of "path"
// The path is empty if there is no dot
ext :: proc(path: string, new := false, allocator := context.allocator) -> string {
if path == "" {
return "";
}
for i := len(path)-1; i >= 0; i -= 1 {
switch path[i] {
case '/', '\\': return "";
case '.': return new ? strings.clone(path[i+1:], allocator) : path[i+1:];
for i := len(path)-1; i >= 0 && !is_separator_byte(path[i]); i -= 1 {
if path[i] == '.' {
res := path[i:];
if new {
res = strings.clone(res, allocator);
}
return res;
}
}
// path does not include a file extension
return "";
}
// name returns the file without the base and without the extension
name :: proc(path: string, new := false, allocator := context.allocator) -> (name: string) {
_, file := split(path);
name = file;
defer if new {
name = strings.clone(name, allocator);
}
for i := len(file)-1; i >= 0 && !is_separator_byte(file[i]); i -= 1 {
if file[i] == '.' {
name = file[:i];
return;
}
}
return file;
}
rel :: proc{rel_between, rel_current};
@@ -177,7 +297,7 @@ rel_between :: proc(from, to: string, allocator := context.allocator) -> string
buffer := make([]byte, 2 + len(to), allocator);
buffer[0] = '.';
buffer[1] = SEPARATOR;
buffer[1] = '/';
copy(buffer[2:], to);
return string(buffer);
@@ -188,7 +308,7 @@ rel_between :: proc(from, to: string, allocator := context.allocator) -> string
for i in 0..<from_slashes {
buffer[i*3+0] = '.';
buffer[i*3+1] = '.';
buffer[i*3+2] = SEPARATOR;
buffer[i*3+2] = '/';
}
copy(buffer[from_slashes*3:], to);
@@ -205,7 +325,47 @@ rel_current :: proc(to: string, allocator := context.allocator) -> string {
}
// splits the path elements into slices of the original path string
split :: proc(s: string, allocator := context.allocator) -> []string {
return strings.split_multi(s, []string{"\\", "/"}, true, allocator);
/*
Lazy_Buffer is a lazily made path buffer
When it does allocate, it uses the context.allocator
*/
@(private)
Lazy_Buffer :: struct {
s: string,
b: []byte,
w: int, // write index
}
@(private)
lazy_buffer_index :: proc(lb: ^Lazy_Buffer, i: int) -> byte {
if lb.b != nil {
return lb.b[i];
}
return lb.s[i];
}
@(private)
lazy_buffer_append :: proc(lb: ^Lazy_Buffer, c: byte) {
if lb.b == nil {
if lb.w < len(lb.s) && lb.s[lb.w] == c {
lb.w += 1;
return;
}
lb.b = make([]byte, len(lb.s));
copy(lb.b, lb.s[:lb.w]);
}
lb.b[lb.w] = c;
lb.w += 1;
}
@(private)
lazy_buffer_string :: proc(lb: ^Lazy_Buffer) -> string {
if lb.b == nil {
return strings.clone(lb.s[:lb.w]);
}
return string(lb.b[:lb.w]);
}

View File

@@ -9,9 +9,11 @@ import "core:strings"
MAX :: 4096; // @note(bp): apparently PATH_MAX is bullshit
SEPARATOR :: '/';
SEPARATOR_STRING :: "/";
OS_SEPARATOR :: '/';
OS_SEPARATOR_STRING :: "/";
OS_SEPARATORS :: `/`;
OS_SEPARATORS_ARRAY :: []string{`/`};
@(private)
null_term :: proc(str: string) -> string {

View File

@@ -4,9 +4,11 @@ import "core:strings"
import win32 "core:sys/windows"
SEPARATOR :: '\\';
SEPARATOR_STRING :: "\\";
OS_SEPARATOR :: '\\';
OS_SEPARATOR_STRING :: "\\";
OS_SEPARATORS :: `/\`;
OS_SEPARATORS_ARRAY :: []string{`/`, `\`};
@(private)
null_term :: proc "contextless" (str: string) -> string {