Add more filepath to os2

This commit is contained in:
Jeroen van Rijn
2025-10-30 23:56:13 +01:00
parent af8bc8bbfc
commit cc50be1a6c
6 changed files with 493 additions and 419 deletions

View File

@@ -24,6 +24,7 @@ General_Error :: enum u32 {
Invalid_Command,
Pattern_Has_Separator,
Pattern_Syntax_Error, // Indicates an error in `glob` or `match` pattern.
No_HOME_Variable,
Env_Var_Not_Found,
@@ -74,6 +75,7 @@ error_string :: proc(ferr: Error) -> string {
case .Invalid_Callback: return "invalid callback"
case .Invalid_Command: return "invalid command"
case .Pattern_Has_Separator: return "pattern has separator"
case .Pattern_Syntax_Error: return "glob pattern syntax error"
case .No_HOME_Variable: return "no $HOME variable"
case .Env_Var_Not_Found: return "environment variable not found"
}

View File

@@ -1,11 +1,14 @@
package os2
import "base:runtime"
import "core:slice"
import "core:strings"
import "core:unicode/utf8"
Path_Separator :: _Path_Separator // OS-Specific
Path_Separator_String :: _Path_Separator_String // OS-Specific
Path_Separator_Chars :: `/\`
Path_List_Separator :: _Path_List_Separator // OS-Specific
#assert(_Path_Separator <= rune(0x7F), "The system-specific path separator rune is expected to be within the 7-bit ASCII character set.")
@@ -315,6 +318,143 @@ split_path :: proc(path: string) -> (dir, filename: string) {
return _split_path(path)
}
/*
Gets the file name and extension from a path.
e.g.
'path/to/name.tar.gz' -> 'name.tar.gz'
'path/to/name.txt' -> 'name.txt'
'path/to/name' -> 'name'
Returns "." if the path is an empty string.
*/
base :: proc(path: string) -> string {
if path == "" {
return "."
}
_, file := split_path(path)
return file
}
/*
Gets the name of a file from a path.
The stem of a file is such that `stem(path)` + `ext(path)` = `base(path)`.
Only the last dot is considered when splitting the file extension.
See `short_stem`.
e.g.
'name.tar.gz' -> 'name.tar'
'name.txt' -> 'name'
Returns an empty string if there is no stem. e.g: '.gitignore'.
Returns an empty string if there's a trailing path separator.
*/
stem :: proc(path: string) -> string {
if len(path) > 0 {
if is_path_separator(path[len(path) - 1]) {
// NOTE(tetra): Trailing separator
return ""
} else if path[0] == '.' {
return ""
}
}
// NOTE(tetra): Get the basename
path := path
if i := strings.last_index_any(path, Path_Separator_Chars); i != -1 {
path = path[i+1:]
}
if i := strings.last_index_byte(path, '.'); i != -1 {
return path[:i]
}
return path
}
/*
Gets the name of a file from a path.
The short stem is such that `short_stem(path)` + `long_ext(path)` = `base(path)`,
where `long_ext` is the extension returned by `split_filename_all`.
The first dot is used to split off the file extension, unlike `stem` which uses the last dot.
e.g.
'name.tar.gz' -> 'name'
'name.txt' -> 'name'
Returns an empty string if there is no stem. e.g: '.gitignore'.
Returns an empty string if there's a trailing path separator.
*/
short_stem :: proc(path: string) -> string {
s := stem(path)
if i := strings.index_byte(s, '.'); i != -1 {
return s[:i]
}
return s
}
/*
Gets the file extension from a path, including the dot.
The file extension is such that `stem_path(path)` + `ext(path)` = `base(path)`.
Only the last dot is considered when splitting the file extension.
See `long_ext`.
e.g.
'name.tar.gz' -> '.gz'
'name.txt' -> '.txt'
Returns an empty string if there is no dot.
Returns an empty string if there is a trailing path separator.
*/
ext :: proc(path: string) -> string {
for i := len(path)-1; i >= 0 && !is_path_separator(path[i]); i -= 1 {
if path[i] == '.' {
return path[i:]
}
}
return ""
}
/*
Gets the file extension from a path, including the dot.
The long file extension is such that `short_stem(path)` + `long_ext(path)` = `base(path)`.
The first dot is used to split off the file extension, unlike `ext` which uses the last dot.
e.g.
'name.tar.gz' -> '.tar.gz'
'name.txt' -> '.txt'
Returns an empty string if there is no dot.
Returns an empty string if there is a trailing path separator.
*/
long_ext :: proc(path: string) -> string {
if len(path) > 0 && is_path_separator(path[len(path) - 1]) {
// NOTE(tetra): Trailing separator
return ""
}
// NOTE(tetra): Get the basename
path := path
if i := strings.last_index_any(path, Path_Separator_Chars); i != -1 {
path = path[i+1:]
}
if i := strings.index_byte(path, '.'); i != -1 {
return path[i:]
}
return ""
}
/*
Join all `elems` with the system's path separator and normalize the result.
@@ -460,3 +600,336 @@ split_path_list :: proc(path: string, allocator: runtime.Allocator) -> (list: []
return list, nil
}
/*
`match` states whether "name" matches the shell pattern
Pattern syntax is:
pattern:
{term}
term:
'*' matches any sequence of non-/ characters
'?' matches any single non-/ character
'[' ['^'] { character-range } ']'
character classification (cannot be empty)
c matches character c (c != '*', '?', '\\', '[')
'\\' c matches character c
character-range
c matches character c (c != '\\', '-', ']')
'\\' c matches character c
lo '-' hi matches character c for lo <= c <= hi
`match` requires that the pattern matches the entirety of the name, not just a substring.
The only possible error returned is `.Syntax_Error` or an allocation error.
NOTE(bill): This is effectively the shell pattern matching system found
*/
match :: proc(pattern, name: string) -> (matched: bool, err: Error) {
pattern, name := pattern, name
pattern_loop: for len(pattern) > 0 {
star: bool
chunk: string
star, chunk, pattern = scan_chunk(pattern)
if star && chunk == "" {
return !strings.contains(name, _Path_Separator_String), nil
}
t, ok := match_chunk(chunk, name) or_return
if ok && (len(t) == 0 || len(pattern) > 0) {
name = t
continue
}
if star {
for i := 0; i < len(name) && name[i] != _Path_Separator; i += 1 {
t, ok = match_chunk(chunk, name[i+1:]) or_return
if ok {
if len(pattern) == 0 && len(t) > 0 {
continue
}
name = t
continue pattern_loop
}
}
}
return false, nil
}
return len(name) == 0, nil
}
// glob returns the names of all files matching pattern or nil if there are no matching files
// The syntax of patterns is the same as "match".
// The pattern may describe hierarchical names such as /usr/*/bin (assuming '/' is a separator)
//
// glob ignores file system errors
//
glob :: proc(pattern: string, allocator := context.allocator) -> (matches: []string, err: Error) {
context.allocator = allocator
if !has_meta(pattern) {
// TODO(bill): os.lstat on here to check for error
m := make([]string, 1)
m[0] = pattern
return m[:], nil
}
dir, file := split_path(pattern)
volume_len: int
temp_buf: [8]byte
volume_len, dir = _clean_glob_path(dir, temp_buf[:])
if !has_meta(dir[volume_len:]) {
m, e := _glob(dir, file, nil)
return m[:], e
}
m := glob(dir) or_return
defer {
for s in m {
delete(s)
}
delete(m)
}
dmatches := make([dynamic]string, 0, 0)
for d in m {
dmatches, err = _glob(d, file, &dmatches)
if err != nil {
break
}
}
if len(dmatches) > 0 {
matches = dmatches[:]
}
return
}
/*
Returns leading volume name.
e.g.
"C:\foo\bar\baz" will return "C:" on Windows.
Everything else will be "".
*/
volume_name :: proc(path: string) -> string {
when ODIN_OS == .Windows {
return path[:_volume_name_len(path)]
} else {
return ""
}
}
@(private="file")
scan_chunk :: proc(pattern: string) -> (star: bool, chunk, rest: string) {
pattern := pattern
for len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:]
star = true
}
in_range, i := false, 0
scan_loop: for i = 0; i < len(pattern); i += 1 {
switch pattern[i] {
case '\\':
when ODIN_OS != .Windows {
if i+1 < len(pattern) {
i += 1
}
}
case '[':
in_range = true
case ']':
in_range = false
case '*':
in_range or_break scan_loop
}
}
return star, pattern[:i], pattern[i:]
}
@(private="file")
match_chunk :: proc(chunk, s: string) -> (rest: string, ok: bool, err: Error) {
chunk, s := chunk, s
for len(chunk) > 0 {
if len(s) == 0 {
return
}
switch chunk[0] {
case '[':
r, w := utf8.decode_rune_in_string(s)
s = s[w:]
chunk = chunk[1:]
is_negated := false
if len(chunk) > 0 && chunk[0] == '^' {
is_negated = true
chunk = chunk[1:]
}
match := false
range_count := 0
for {
if len(chunk) > 0 && chunk[0] == ']' && range_count > 0 {
chunk = chunk[1:]
break
}
lo, hi: rune
if lo, chunk, err = get_escape(chunk); err != nil {
return
}
hi = lo
if chunk[0] == '-' {
if hi, chunk, err = get_escape(chunk[1:]); err != nil {
return
}
}
if lo <= r && r <= hi {
match = true
}
range_count += 1
}
if match == is_negated {
return
}
case '?':
if s[0] == _Path_Separator {
return
}
_, w := utf8.decode_rune_in_string(s)
s = s[w:]
chunk = chunk[1:]
case '\\':
when ODIN_OS != .Windows {
chunk = chunk[1:]
if len(chunk) == 0 {
err = .Pattern_Syntax_Error
return
}
}
fallthrough
case:
if chunk[0] != s[0] {
return
}
s = s[1:]
chunk = chunk[1:]
}
}
return s, true, nil
}
@(private="file")
get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Error) {
if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
err = .Pattern_Syntax_Error
return
}
chunk := chunk
if chunk[0] == '\\' && ODIN_OS != .Windows {
chunk = chunk[1:]
if len(chunk) == 0 {
err = .Pattern_Syntax_Error
return
}
}
w: int
r, w = utf8.decode_rune_in_string(chunk)
if r == utf8.RUNE_ERROR && w == 1 {
err = .Pattern_Syntax_Error
}
next_chunk = chunk[w:]
if len(next_chunk) == 0 {
err = .Pattern_Syntax_Error
}
return
}
// Internal implementation of `glob`, not meant to be used by the user. Prefer `glob`.
_glob :: proc(dir, pattern: string, matches: ^[dynamic]string, allocator := context.allocator) -> (m: [dynamic]string, e: Error) {
context.allocator = allocator
if matches != nil {
m = matches^
} else {
m = make([dynamic]string, 0, 0)
}
d := open(dir, O_RDONLY) or_return
defer close(d)
file_info := fstat(d, allocator) or_return
defer file_info_delete(file_info, allocator)
if file_info.type != .Directory {
return
}
fis, _ := read_dir(d, -1, allocator)
slice.sort_by(fis, proc(a, b: File_Info) -> bool {
return a.name < b.name
})
defer file_info_slice_delete(fis, allocator)
for fi in fis {
matched := match(pattern, fi.name) or_return
if matched {
matched_path := join_path({dir, fi.name}, allocator) or_return
append(&m, matched_path)
}
}
return
}
@(private)
has_meta :: proc(path: string) -> bool {
when ODIN_OS == .Windows {
CHARS :: `*?[`
} else {
CHARS :: `*?[\`
}
return strings.contains_any(path, CHARS)
}
@(private)
_clean_glob_path :: proc(path: string, temp_buf: []byte) -> (prefix_len: int, cleaned: string) {
when ODIN_OS == .Windows {
vol_len := _volume_name_len(path)
switch {
case path == "":
return 0, "."
case vol_len+1 == len(path) && is_path_separator(path[len(path)-1]): // /, \, C:\, C:/
return vol_len+1, path
case vol_len == len(path) && len(path) == 2: // C:
copy(temp_buf[:], path)
temp_buf[2] = '.'
return vol_len, string(temp_buf[:3])
}
if vol_len >= len(path) {
vol_len = len(path) -1
}
return vol_len, path[:len(path)-1]
} else {
switch path {
case "":
return 0, "."
case _Path_Separator_String:
return 0, path
}
return 0, path[:len(path)-1]
}
}

View File

@@ -355,4 +355,4 @@ _split_path :: proc(path: string) -> (dir, file: string) {
return path[:i], path[i+1:]
}
return "", path
}
}

View File

@@ -3,14 +3,6 @@
package filepath
import os "core:os/os2"
import "core:slice"
import "core:strings"
import "core:unicode/utf8"
Match_Error :: enum {
None,
Syntax_Error,
}
// match states whether "name" matches the shell pattern
// Pattern syntax is:
@@ -34,183 +26,7 @@ Match_Error :: enum {
//
// NOTE(bill): This is effectively the shell pattern matching system found
//
match :: proc(pattern, name: string) -> (matched: bool, err: Match_Error) {
pattern, name := pattern, name
pattern_loop: for len(pattern) > 0 {
star: bool
chunk: string
star, chunk, pattern = scan_chunk(pattern)
if star && chunk == "" {
return !strings.contains(name, SEPARATOR_STRING), .None
}
t: string
ok: bool
t, ok, err = match_chunk(chunk, name)
if ok && (len(t) == 0 || len(pattern) > 0) {
name = t
continue
}
if err != .None {
return
}
if star {
for i := 0; i < len(name) && name[i] != SEPARATOR; i += 1 {
t, ok, err = match_chunk(chunk, name[i+1:])
if ok {
if len(pattern) == 0 && len(t) > 0 {
continue
}
name = t
continue pattern_loop
}
if err != .None {
return
}
}
}
return false, .None
}
return len(name) == 0, .None
}
@(private="file")
scan_chunk :: proc(pattern: string) -> (star: bool, chunk, rest: string) {
pattern := pattern
for len(pattern) > 0 && pattern[0] == '*' {
pattern = pattern[1:]
star = true
}
in_range, i := false, 0
scan_loop: for i = 0; i < len(pattern); i += 1 {
switch pattern[i] {
case '\\':
when ODIN_OS != .Windows {
if i+1 < len(pattern) {
i += 1
}
}
case '[':
in_range = true
case ']':
in_range = false
case '*':
in_range or_break scan_loop
}
}
return star, pattern[:i], pattern[i:]
}
@(private="file")
match_chunk :: proc(chunk, s: string) -> (rest: string, ok: bool, err: Match_Error) {
chunk, s := chunk, s
for len(chunk) > 0 {
if len(s) == 0 {
return
}
switch chunk[0] {
case '[':
r, w := utf8.decode_rune_in_string(s)
s = s[w:]
chunk = chunk[1:]
is_negated := false
if len(chunk) > 0 && chunk[0] == '^' {
is_negated = true
chunk = chunk[1:]
}
match := false
range_count := 0
for {
if len(chunk) > 0 && chunk[0] == ']' && range_count > 0 {
chunk = chunk[1:]
break
}
lo, hi: rune
if lo, chunk, err = get_escape(chunk); err != .None {
return
}
hi = lo
if chunk[0] == '-' {
if hi, chunk, err = get_escape(chunk[1:]); err != .None {
return
}
}
if lo <= r && r <= hi {
match = true
}
range_count += 1
}
if match == is_negated {
return
}
case '?':
if s[0] == SEPARATOR {
return
}
_, w := utf8.decode_rune_in_string(s)
s = s[w:]
chunk = chunk[1:]
case '\\':
when ODIN_OS != .Windows {
chunk = chunk[1:]
if len(chunk) == 0 {
err = .Syntax_Error
return
}
}
fallthrough
case:
if chunk[0] != s[0] {
return
}
s = s[1:]
chunk = chunk[1:]
}
}
return s, true, .None
}
@(private="file")
get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Match_Error) {
if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' {
err = .Syntax_Error
return
}
chunk := chunk
if chunk[0] == '\\' && ODIN_OS != .Windows {
chunk = chunk[1:]
if len(chunk) == 0 {
err = .Syntax_Error
return
}
}
w: int
r, w = utf8.decode_rune_in_string(chunk)
if r == utf8.RUNE_ERROR && w == 1 {
err = .Syntax_Error
}
next_chunk = chunk[w:]
if len(next_chunk) == 0 {
err = .Syntax_Error
}
return
}
match :: os.match
// glob returns the names of all files matching pattern or nil if there are no matching files
// The syntax of patterns is the same as "match".
@@ -218,140 +34,4 @@ get_escape :: proc(chunk: string) -> (r: rune, next_chunk: string, err: Match_Er
//
// glob ignores file system errors
//
glob :: proc(pattern: string, allocator := context.allocator) -> (matches: []string, err: Match_Error) {
context.allocator = allocator
if !has_meta(pattern) {
// TODO(bill): os.lstat on here to check for error
m := make([]string, 1)
m[0] = pattern
return m[:], .None
}
dir, file := split(pattern)
volume_len := 0
when ODIN_OS == .Windows {
temp_buf: [8]byte
volume_len, dir = clean_glob_path_windows(dir, temp_buf[:])
} else {
dir = clean_glob_path(dir)
}
if !has_meta(dir[volume_len:]) {
m, e := _glob(dir, file, nil)
return m[:], e
}
m: []string
m, err = glob(dir)
if err != .None {
return
}
defer {
for s in m {
delete(s)
}
delete(m)
}
dmatches := make([dynamic]string, 0, 0)
for d in m {
dmatches, err = _glob(d, file, &dmatches)
if err != .None {
break
}
}
if len(dmatches) > 0 {
matches = dmatches[:]
}
return
}
// Internal implementation of `glob`, not meant to be used by the user. Prefer `glob`.
_glob :: proc(dir, pattern: string, matches: ^[dynamic]string, allocator := context.allocator) -> (m: [dynamic]string, e: Match_Error) {
context.allocator = allocator
if matches != nil {
m = matches^
} else {
m = make([dynamic]string, 0, 0)
}
d, derr := os.open(dir, os.O_RDONLY)
if derr != nil {
return
}
defer os.close(d)
{
file_info, ferr := os.fstat(d, allocator)
defer os.file_info_delete(file_info, allocator)
if ferr != nil {
return
}
if file_info.type != .Directory {
return
}
}
fis, _ := os.read_dir(d, -1, allocator)
slice.sort_by(fis, proc(a, b: os.File_Info) -> bool {
return a.name < b.name
})
defer os.file_info_slice_delete(fis, allocator)
for fi in fis {
n := fi.name
matched := match(pattern, n) or_return
if matched {
append(&m, join({dir, n}))
}
}
return
}
@(private)
has_meta :: proc(path: string) -> bool {
when ODIN_OS == .Windows {
CHARS :: `*?[`
} else {
CHARS :: `*?[\`
}
return strings.contains_any(path, CHARS)
}
@(private)
clean_glob_path :: proc(path: string) -> string {
switch path {
case "":
return "."
case SEPARATOR_STRING:
return path
}
return path[:len(path)-1]
}
@(private)
clean_glob_path_windows :: proc(path: string, temp_buf: []byte) -> (prefix_len: int, cleaned: string) {
vol_len := volume_name_len(path)
switch {
case path == "":
return 0, "."
case vol_len+1 == len(path) && is_separator(path[len(path)-1]): // /, \, C:\, C:/
return vol_len+1, path
case vol_len == len(path) && len(path) == 2: // C:
copy(temp_buf[:], path)
temp_buf[2] = '.'
return vol_len, string(temp_buf[:3])
}
if vol_len >= len(path) {
vol_len = len(path) -1
}
return vol_len, path[:len(path)-1]
}
glob :: os.glob

View File

@@ -2,19 +2,14 @@
// To process paths such as URLs that depend on forward slashes regardless of the OS, use the slashpath package.
package filepath
import "base:runtime"
import "core:strings"
import "base:runtime"
import os "core:os/os2"
import "core:strings"
SEPARATOR_CHARS :: `/\`
// is_separator checks whether the byte is a valid separator character
is_separator :: proc(c: byte) -> bool {
switch c {
case '/': return true
case '\\': return ODIN_OS == .Windows
}
return false
}
is_separator :: os.is_path_separator
@(private)
is_slash :: proc(c: byte) -> bool {
@@ -23,14 +18,7 @@ is_slash :: proc(c: byte) -> bool {
// Splits path immediate following the last separator; separating the path into a directory and file.
// If no separator is found, `dir` will be empty and `path` set to `path`.
split :: proc(path: string) -> (dir, file: string) {
vol := volume_name(path)
i := len(path) - 1
for i >= len(vol) && !is_separator(path[i]) {
i -= 1
}
return path[:i+1], path[i+1:]
}
split :: os.split_path
/*
Returns leading volume name.
@@ -123,30 +111,7 @@ volume_name_len :: proc(path: string) -> int {
Returns "." if the path is an empty string.
*/
base :: proc(path: string) -> string {
if path == "" {
return "."
}
path := path
for len(path) > 0 && is_separator(path[len(path)-1]) {
path = path[:len(path)-1]
}
path = path[volume_name_len(path):]
i := len(path)-1
for i >= 0 && !is_separator(path[i]) {
i -= 1
}
if i >= 0 {
path = path[i+1:]
}
if path == "" {
return SEPARATOR_STRING
}
return path
}
base :: os.base
/*
Gets the name of a file from a path.
@@ -163,24 +128,7 @@ base :: proc(path: string) -> string {
Returns an empty string if there is no stem. e.g: '.gitignore'.
Returns an empty string if there's a trailing path separator.
*/
stem :: proc(path: string) -> string {
if len(path) > 0 && is_separator(path[len(path) - 1]) {
// NOTE(tetra): Trailing separator
return ""
}
// NOTE(tetra): Get the basename
path := path
if i := strings.last_index_any(path, SEPARATOR_CHARS); i != -1 {
path = path[i+1:]
}
if i := strings.last_index_byte(path, '.'); i != -1 {
return path[:i]
}
return path
}
stem :: os.stem
/*
Gets the name of a file from a path.
@@ -196,13 +144,7 @@ stem :: proc(path: string) -> string {
Returns an empty string if there is no stem. e.g: '.gitignore'.
Returns an empty string if there's a trailing path separator.
*/
short_stem :: proc(path: string) -> string {
s := stem(path)
if i := strings.index_byte(s, '.'); i != -1 {
return s[:i]
}
return s
}
short_stem :: os.short_stem
/*
Gets the file extension from a path, including the dot.
@@ -219,14 +161,7 @@ short_stem :: proc(path: string) -> string {
Returns an empty string if there is no dot.
Returns an empty string if there is a trailing path separator.
*/
ext :: proc(path: string) -> string {
for i := len(path)-1; i >= 0 && !is_separator(path[i]); i -= 1 {
if path[i] == '.' {
return path[i:]
}
}
return ""
}
ext :: os.ext
/*
Gets the file extension from a path, including the dot.
@@ -242,24 +177,7 @@ ext :: proc(path: string) -> string {
Returns an empty string if there is no dot.
Returns an empty string if there is a trailing path separator.
*/
long_ext :: proc(path: string) -> string {
if len(path) > 0 && is_separator(path[len(path) - 1]) {
// NOTE(tetra): Trailing separator
return ""
}
// NOTE(tetra): Get the basename
path := path
if i := strings.last_index_any(path, SEPARATOR_CHARS); i != -1 {
path = path[i+1:]
}
if i := strings.index_byte(path, '.'); i != -1 {
return path[i:]
}
return ""
}
long_ext :: os.long_ext
/*
Returns the shortest path name equivalent to `path` through solely lexical processing.
@@ -591,4 +509,4 @@ lazy_buffer_destroy :: proc(lb: ^Lazy_Buffer) -> runtime.Allocator_Error {
err := delete(lb.b)
lb^ = {}
return err
}
}

View File

@@ -41,7 +41,7 @@ abs :: proc(path: string, allocator := context.allocator) -> (string, bool) {
if err != nil {
return "", false
}
p := clean(full_path, allocator)
p, _ := clean(full_path, allocator)
return p, true
}
@@ -68,7 +68,8 @@ join_non_empty :: proc(elems: []string, allocator := context.allocator) -> (join
}
s := strings.join(elems[i:], SEPARATOR_STRING, context.temp_allocator) or_return
s = strings.concatenate({elems[0], s}, context.temp_allocator) or_return
return clean(s)
s, _ = clean(s)
return
}
p := strings.join(elems, SEPARATOR_STRING, context.temp_allocator) or_return