From abe0c308371456b603c828ba09e2394a6d9171f2 Mon Sep 17 00:00:00 2001 From: Feoramund <161657516+Feoramund@users.noreply.github.com> Date: Thu, 20 Mar 2025 17:39:16 -0400 Subject: [PATCH] Add new path API for `os2` --- core/os/os2/path.odin | 369 ++++++++++++++++++++++++++++++++++ core/os/os2/path_posixfs.odin | 78 +++++++ core/os/os2/path_windows.odin | 96 ++++++++- core/os/os2/stat_windows.odin | 18 -- 4 files changed, 541 insertions(+), 20 deletions(-) create mode 100644 core/os/os2/path_posixfs.odin diff --git a/core/os/os2/path.odin b/core/os/os2/path.odin index 3dedc2961..fb2008f37 100644 --- a/core/os/os2/path.odin +++ b/core/os/os2/path.odin @@ -1,8 +1,10 @@ package os2 +import "base:intrinsics" import "base:runtime" import "core:path/filepath" +import "core:strings" Path_Separator :: _Path_Separator // OS-Specific Path_Separator_String :: _Path_Separator_String // OS-Specific @@ -92,3 +94,370 @@ get_executable_directory :: proc(allocator: runtime.Allocator) -> (path: string, path, _ = filepath.split(path) return } + +/* +Compare two paths for exactness without normalization. + +This procedure takes into account case-sensitivity on differing systems. +*/ +@(require_results) +are_paths_identical :: proc(a, b: string) -> (identical: bool) { + return _are_paths_identical(a, b) +} + +/* +Normalize a path. + +*Allocates Using Provided Allocator* + +This will remove duplicate separators and unneeded references to the current or +parent directory. +*/ +@(require_results) +clean_path :: proc(path: string, allocator: runtime.Allocator) -> (cleaned: string, err: Error) { + if path == "" || path == "." { + return strings.clone(".", allocator) + } + + TEMP_ALLOCATOR_GUARD() + + // The extra byte is to simplify appending path elements by letting the + // loop to end each with a separator. We'll trim the last one when we're done. + buffer := make([]u8, len(path) + 1, temp_allocator()) or_return + + // This is the only point where Windows and POSIX differ, as Windows has + // alphabet-based volumes for root paths. + rooted, start := _clean_path_handle_start(path, buffer) + + head, buffer_i := start, start + for i, j := start, start; i <= len(path); i += 1 { + if i == len(path) || _is_path_separator(path[i]) { + elem := path[j:i] + j = i + 1 + + switch elem { + case "", ".": + // Skip duplicate path separators and current directory references. + case "..": + if !rooted && buffer_i == head { + // Only allow accessing further parent directories when the path is relative. + buffer[buffer_i] = '.' + buffer[buffer_i+1] = '.' + buffer[buffer_i+2] = _Path_Separator + buffer_i += 3 + head = buffer_i + } else { + // Roll back to the last separator or the head of the buffer. + back_to := head + // `buffer_i` will be equal to 1 + the last set byte, so + // skipping two bytes avoids the final separator we just + // added. + for k := buffer_i-2; k >= head; k -= 1 { + if _is_path_separator(buffer[k]) { + back_to = k + 1 + break + } + } + buffer_i = back_to + } + case: + // Copy the path element verbatim and add a separator. + intrinsics.mem_copy_non_overlapping(raw_data(buffer[buffer_i:]), raw_data(elem), len(elem)) + buffer_i += len(elem) + buffer[buffer_i] = _Path_Separator + buffer_i += 1 + } + } + } + + // Trim the final separator. + // NOTE: No need to check if the last byte is a separator, as we always add it. + if buffer_i > start { + buffer_i -= 1 + } + + if buffer_i == 0 { + return strings.clone(".", allocator) + } + + compact := make([]u8, buffer_i, allocator) or_return + intrinsics.mem_copy_non_overlapping(raw_data(compact), raw_data(buffer), buffer_i) + return string(compact), nil +} + +/* +Return true if `path` is an absolute path as opposed to a relative one. +*/ +@(require_results) +is_absolute_path :: proc(path: string) -> bool { + return _is_absolute_path(path) +} + +/* +Get the absolute path to `path` with respect to the process's current directory. + +*Allocates Using Provided Allocator* +*/ +@(require_results) +get_absolute_path :: proc(path: string, allocator: runtime.Allocator) -> (absolute_path: string, err: Error) { + return _get_absolute_path(path, allocator) +} + +/* +Get the relative path needed to change directories from `base` to `target`. + +*Allocates Using Provided Allocator* + +The result is such that `join_path(base, get_relative_path(base, target))` is equivalent to `target`. + +NOTE: This procedure expects both `base` and `target` to be normalized first, +which can be done by calling `clean_path` on them if needed. + +This procedure will return an `Invalid_Path` error if `base` begins with a +reference to the parent directory (`".."`). Use `get_working_directory` with +`join_path` to construct absolute paths for both arguments instead. +*/ +@(require_results) +get_relative_path :: proc(base, target: string, allocator: runtime.Allocator) -> (path: string, err: Error) { + if _are_paths_identical(base, target) { + return strings.clone(".", allocator) + } + if base == "." { + return strings.clone(target, allocator) + } + + // This is the first point where Windows and POSIX differ, as Windows has + // alphabet-based volumes for root paths. + if !_get_relative_path_handle_start(base, target) { + return "", .Invalid_Path + } + if strings.has_prefix(base, "..") && (len(base) == 2 || _is_path_separator(base[2])) { + // We could do the work for the user of getting absolute paths for both + // arguments, but that could make something costly (repeatedly + // normalizing paths) convenient, when it would be better for the user + // to store already-finalized paths and operate on those instead. + return "", .Invalid_Path + } + + // This is the other point where Windows and POSIX differ, as Windows is + // case-insensitive. + common := _get_common_path_len(base, target) + + // Get the result of splitting `base` and `target` on _Path_Separator, + // comparing them up to their most common elements, then count how many + // unshared parts are in the split `base`. + seps := 0 + size := 0 + if len(base)-common > 0 { + seps = 1 + size = 2 + } + // This range skips separators on the ends of the string. + for i in common+1.. 0 { + // Account for leading separators on the target after cutting the common part. + // (i.e. base == `/home`, target == `/home/a`) + if _is_path_separator(trailing[0]) { + trailing = trailing[1:] + } + size += len(trailing) + if seps > 0 { + size += 1 + } + } + if trailing == "." { + trailing = "" + size -= 2 + } + + // Build the string. + buf := make([]u8, size, allocator) or_return + n := 0 + if seps > 0 { + buf[0] = '.' + buf[1] = '.' + n = 2 + } + for _ in 1.. 0 { + if seps > 0 { + buf[n] = _Path_Separator + n += 1 + } + runtime.mem_copy_non_overlapping(raw_data(buf[n:]), raw_data(trailing), len(trailing)) + } + + path = string(buf) + + return +} + +/* +Split a path into a directory hierarchy and a filename. + +For example, `split_path("/home/foo/bar.tar.gz")` will return `"/home/foo"` and `"bar.tar.gz"`. +*/ +@(require_results) +split_path :: proc(path: string) -> (dir, filename: string) { + return _split_path(path) +} + +/* +Join all `elems` with the system's path separator and normalize the result. + +*Allocates Using Provided Allocator* + +For example, `join_path({"/home", "foo", "bar.txt"})` will result in `"/home/foo/bar.txt"`. +*/ +@(require_results) +join_path :: proc(elems: []string, allocator: runtime.Allocator) -> (joined: string, err: Error) { + for e, i in elems { + if e != "" { + TEMP_ALLOCATOR_GUARD() + p := strings.join(elems[i:], Path_Separator_String, temp_allocator()) or_return + return clean_path(p, allocator) + } + } + return "", nil +} + +/* +Split a filename from its extension. + +This procedure splits on the last separator. + +If the filename begins with a separator, such as `".readme.txt"`, the separator +will be included in the filename, resulting in `".readme"` and `"txt"`. + +For example, `split_filename("foo.tar.gz")` will return `"foo.tar"` and `"gz"`. +*/ +@(require_results) +split_filename :: proc(filename: string) -> (base, ext: string) { + i := strings.last_index_byte(filename, '.') + if i <= 0 { + return filename, "" + } + return filename[:i], filename[i+1:] +} + +/* +Split a filename from its extension. + +This procedure splits on the first separator. + +If the filename begins with a separator, such as `".readme.txt.gz"`, the separator +will be included in the filename, resulting in `".readme"` and `"txt.gz"`. + +For example, `split_filename_all("foo.tar.gz")` will return `"foo"` and `"tar.gz"`. +*/ +@(require_results) +split_filename_all :: proc(filename: string) -> (base, ext: string) { + i := strings.index_byte(filename, '.') + if i == 0 { + j := strings.index_byte(filename[1:], '.') + if j != -1 { + j += 1 + } + i = j + } + if i == -1 { + return filename, "" + } + return filename[:i], filename[i+1:] +} + +/* +Join `base` and `ext` with the system's filename extension separator. + +*Allocates Using Provided Allocator* + +For example, `join_filename("foo", "tar.gz")` will result in `"foo.tar.gz"`. +*/ +@(require_results) +join_filename :: proc(base: string, ext: string, allocator: runtime.Allocator) -> (joined: string, err: Error) { + len_base := len(base) + if len_base == 0 { + return strings.clone(ext, allocator) + } else if len(ext) == 0 { + return strings.clone(base, allocator) + } + + buf := make([]u8, len_base + 1 + len(ext), allocator) or_return + intrinsics.mem_copy_non_overlapping(raw_data(buf), raw_data(base), len_base) + buf[len_base] = '.' + intrinsics.mem_copy_non_overlapping(raw_data(buf[1+len_base:]), raw_data(ext), len(ext)) + + return string(buf), nil +} + +/* +Split a string that is separated by a system-specific separator, typically used +for environment variables specifying multiple directories. + +*Allocates Using Provided Allocator* + +For example, there is the "PATH" environment variable on POSIX systems which +this procedure can split into separate entries. +*/ +@(require_results) +split_path_list :: proc(path: string, allocator: runtime.Allocator) -> (list: []string, err: Error) { + if path == "" { + return nil, nil + } + + start: int + quote: bool + + start, quote = 0, false + count := 0 + + for i := 0; i < len(path); i += 1 { + c := path[i] + switch { + case c == '"': + quote = !quote + case c == Path_List_Separator && !quote: + count += 1 + } + } + + start, quote = 0, false + list = make([]string, count + 1, allocator) or_return + index := 0 + for i := 0; i < len(path); i += 1 { + c := path[i] + switch { + case c == '"': + quote = !quote + case c == Path_List_Separator && !quote: + list[index] = path[start:i] + index += 1 + start = i + 1 + } + } + assert(index == count) + list[index] = path[start:] + + for s0, i in list { + s, new := strings.replace_all(s0, `"`, ``, allocator) + if !new { + s = strings.clone(s, allocator) or_return + } + list[i] = s + } + + return list, nil +} diff --git a/core/os/os2/path_posixfs.odin b/core/os/os2/path_posixfs.odin new file mode 100644 index 000000000..8f9d43d63 --- /dev/null +++ b/core/os/os2/path_posixfs.odin @@ -0,0 +1,78 @@ +#+private +#+build linux, darwin, netbsd, freebsd, openbsd, wasi +package os2 + +// This implementation is for all systems that have POSIX-compliant filesystem paths. + +import "base:runtime" +import "core:strings" +import "core:sys/posix" + +_are_paths_identical :: proc(a, b: string) -> (identical: bool) { + return a == b +} + +_clean_path_handle_start :: proc(path: string, buffer: []u8) -> (rooted: bool, start: int) { + // Preserve rooted paths. + if _is_path_separator(path[0]) { + rooted = true + buffer[0] = _Path_Separator + start = 1 + } + return +} + +_is_absolute_path :: proc(path: string) -> bool { + return len(path) > 0 && _is_path_separator(path[0]) +} + +_get_absolute_path :: proc(path: string, allocator: runtime.Allocator) -> (absolute_path: string, err: Error) { + rel := path + if rel == "" { + rel = "." + } + TEMP_ALLOCATOR_GUARD() + rel_cstr := strings.clone_to_cstring(rel, temp_allocator()) + path_ptr := posix.realpath(rel_cstr, nil) + if path_ptr == nil { + return "", Platform_Error(posix.errno()) + } + defer posix.free(path_ptr) + + path_str := strings.clone(string(path_ptr), allocator) + return path_str, nil +} + +_get_relative_path_handle_start :: proc(base, target: string) -> bool { + base_rooted := len(base) > 0 && _is_path_separator(base[0]) + target_rooted := len(target) > 0 && _is_path_separator(target[0]) + return base_rooted == target_rooted +} + +_get_common_path_len :: proc(base, target: string) -> int { + i := 0 + end := min(len(base), len(target)) + for j in 0..=end { + if j == end || _is_path_separator(base[j]) { + if base[i:j] == target[i:j] { + i = j + } else { + break + } + } + } + return i +} + +_split_path :: proc(path: string) -> (dir, file: string) { + i := len(path) - 1 + for i >= 0 && !_is_path_separator(path[i]) { + i -= 1 + } + if i == 0 { + return path[:i+1], path[i+1:] + } else if i > 0 { + return path[:i], path[i+1:] + } + return "", path +} diff --git a/core/os/os2/path_windows.odin b/core/os/os2/path_windows.odin index 041a4d1e3..c8264cc2d 100644 --- a/core/os/os2/path_windows.odin +++ b/core/os/os2/path_windows.odin @@ -1,8 +1,10 @@ #+private package os2 -import win32 "core:sys/windows" +import "base:intrinsics" import "base:runtime" +import "core:strings" +import win32 "core:sys/windows" _Path_Separator :: '\\' _Path_Separator_String :: "\\" @@ -217,7 +219,7 @@ _fix_long_path_internal :: proc(path: string) -> string { return path } - if !_is_abs(path) { // relative path + if !_is_absolute_path(path) { // relative path return path } @@ -257,3 +259,93 @@ _fix_long_path_internal :: proc(path: string) -> string { return string(path_buf[:w]) } + +_are_paths_identical :: strings.equal_fold + +_clean_path_handle_start :: proc(path: string, buffer: []u8) -> (rooted: bool, start: int) { + // Preserve rooted paths. + start = _volume_name_len(path) + if start > 0 { + rooted = true + if len(path) > start && _is_path_separator(path[start]) { + // Take `C:` to `C:\`. + start += 1 + } + intrinsics.mem_copy_non_overlapping(raw_data(buffer), raw_data(path), start) + } + return +} + +_is_absolute_path :: proc(path: string) -> bool { + if _is_reserved_name(path) { + return true + } + l := _volume_name_len(path) + if l == 0 { + return false + } + + path := path + path = path[l:] + if path == "" { + return false + } + return _is_path_separator(path[0]) +} + +_get_absolute_path :: proc(path: string, allocator: runtime.Allocator) -> (absolute_path: string, err: Error) { + rel := path + if rel == "" { + rel = "." + } + TEMP_ALLOCATOR_GUARD() + rel_utf16 := win32.utf8_to_utf16(rel, temp_allocator()) + n := win32.GetFullPathNameW(raw_data(rel_utf16), 0, nil, nil) + if n == 0 { + return "", Platform_Error(win32.GetLastError()) + } + + buf := make([]u16, n, temp_allocator()) or_return + n = win32.GetFullPathNameW(raw_data(rel_utf16), u32(n), raw_data(buf), nil) + if n == 0 { + return "", Platform_Error(win32.GetLastError()) + } + + return win32.utf16_to_utf8(buf, allocator) +} + +_get_relative_path_handle_start :: proc(base, target: string) -> bool { + base_root := base[:_volume_name_len(base)] + target_root := target[:_volume_name_len(target)] + return strings.equal_fold(base_root, target_root) +} + +_get_common_path_len :: proc(base, target: string) -> int { + i := 0 + end := min(len(base), len(target)) + for j in 0..=end { + if j == end || _is_path_separator(base[j]) { + if strings.equal_fold(base[i:j], target[i:j]) { + i = j + } else { + break + } + } + } + return i +} + +_split_path :: proc(path: string) -> (dir, file: string) { + vol_len := _volume_name_len(path) + + i := len(path) - 1 + for i >= vol_len && !_is_path_separator(path[i]) { + i -= 1 + } + if i == vol_len { + return path[:i+1], path[i+1:] + } else if i > vol_len { + return path[:i], path[i+1:] + } + return "", path +} diff --git a/core/os/os2/stat_windows.odin b/core/os/os2/stat_windows.odin index 31f5d9e88..14744c90f 100644 --- a/core/os/os2/stat_windows.odin +++ b/core/os/os2/stat_windows.odin @@ -351,21 +351,3 @@ _volume_name_len :: proc(path: string) -> int { } return 0 } - -_is_abs :: proc(path: string) -> bool { - if _is_reserved_name(path) { - return true - } - l := _volume_name_len(path) - if l == 0 { - return false - } - - path := path - path = path[l:] - if path == "" { - return false - } - return is_path_separator(path[0]) -} -