diff --git a/changelog.md b/changelog.md index 41be288802..e48f72023c 100644 --- a/changelog.md +++ b/changelog.md @@ -90,6 +90,8 @@ proc enumToString*(enums: openArray[enum]): string = - There is a new stdlib module `std/diff` to compute the famous "diff" of two texts by line. +- Added `os.relativePath`. + ### Library changes - The string output of `macros.lispRepr` proc has been tweaked diff --git a/compiler/pathutils.nim b/compiler/pathutils.nim index 703467bc4d..80c479898c 100644 --- a/compiler/pathutils.nim +++ b/compiler/pathutils.nim @@ -9,9 +9,8 @@ ## Path handling utilities for Nim. Strictly typed code in order ## to avoid the never ending time sink in getting path handling right. -## Might be a candidate for the stdlib later. -import os, strutils +import os, strutils, pathnorm type AbsoluteFile* = distinct string @@ -45,130 +44,9 @@ proc cmpPaths*(x, y: AbsoluteDir): int {.borrow.} proc createDir*(x: AbsoluteDir) {.borrow.} -type - PathIter = object - i, prev: int - notFirst: bool - -proc hasNext(it: PathIter; x: string): bool = - it.i < x.len - -proc next(it: var PathIter; x: string): (int, int) = - it.prev = it.i - if not it.notFirst and x[it.i] in {DirSep, AltSep}: - # absolute path: - inc it.i - else: - while it.i < x.len and x[it.i] notin {DirSep, AltSep}: inc it.i - if it.i > it.prev: - result = (it.prev, it.i-1) - elif hasNext(it, x): - result = next(it, x) - - # skip all separators: - while it.i < x.len and x[it.i] in {DirSep, AltSep}: inc it.i - it.notFirst = true - -iterator dirs(x: string): (int, int) = - var it: PathIter - while hasNext(it, x): yield next(it, x) - -proc isDot(x: string; bounds: (int, int)): bool = - bounds[1] == bounds[0] and x[bounds[0]] == '.' - -proc isDotDot(x: string; bounds: (int, int)): bool = - bounds[1] == bounds[0] + 1 and x[bounds[0]] == '.' and x[bounds[0]+1] == '.' - -proc isSlash(x: string; bounds: (int, int)): bool = - bounds[1] == bounds[0] and x[bounds[0]] in {DirSep, AltSep} - -const canonDirSep = when isMainModule: '/' else: DirSep - -proc canon(x: string; result: var string; state: var int) = - # state: 0th bit set if isAbsolute path. Other bits count - # the number of path components. - for b in dirs(x): - if (state shr 1 == 0) and isSlash(x, b): - result.add canonDirSep - state = state or 1 - elif result.len > (state and 1) and isDotDot(x, b): - var d = result.len - # f/.. - while (d-1) > (state and 1) and result[d-1] notin {DirSep, AltSep}: - dec d - if d > 0: setLen(result, d-1) - elif isDot(x, b): - discard "discard the dot" - elif b[1] >= b[0]: - if result.len > 0 and result[^1] notin {DirSep, AltSep}: - result.add canonDirSep - result.add substr(x, b[0], b[1]) - inc state, 2 - -proc canon(x: string): string = - # - Turn multiple slashes into single slashes. - # - Resolve '/foo/../bar' to '/bar'. - # - Remove './' from the path. - result = newStringOfCap(x.len) - var state = 0 - canon(x, result, state) - -when FileSystemCaseSensitive: - template `!=?`(a, b: char): bool = toLowerAscii(a) != toLowerAscii(b) -else: - template `!=?`(a, b: char): bool = a != b - -proc relativeTo(full, base: string; sep = canonDirSep): string = - if full.len == 0: return "" - var f, b: PathIter - var ff = (0, -1) - var bb = (0, -1) # (int, int) - result = newStringOfCap(full.len) - # skip the common prefix: - while f.hasNext(full) and b.hasNext(base): - ff = next(f, full) - bb = next(b, base) - let diff = ff[1] - ff[0] - if diff != bb[1] - bb[0]: break - var same = true - for i in 0..diff: - if full[i + ff[0]] !=? base[i + bb[0]]: - same = false - break - if not same: break - ff = (0, -1) - bb = (0, -1) - # for i in 0..diff: - # result.add base[i + bb[0]] - - # /foo/bar/xxx/ -- base - # /foo/bar/baz -- full path - # ../baz - # every directory that is in 'base', needs to add '..' - while true: - if bb[1] >= bb[0]: - if result.len > 0 and result[^1] != sep: - result.add sep - result.add ".." - if not b.hasNext(base): break - bb = b.next(base) - - # add the rest of 'full': - while true: - if ff[1] >= ff[0]: - if result.len > 0 and result[^1] != sep: - result.add sep - for i in 0..ff[1] - ff[0]: - result.add full[i + ff[0]] - if not f.hasNext(full): break - ff = f.next(full) - when true: - proc eqImpl(x, y: string): bool = - when FileSystemCaseSensitive: - result = cmpIgnoreCase(canon x, canon y) == 0 - else: - result = canon(x) == canon(y) + proc eqImpl(x, y: string): bool {.inline.} = + result = cmpPaths(x, y) == 0 proc `==`*(x, y: AbsoluteFile): bool = eqImpl(x.string, y.string) proc `==`*(x, y: AbsoluteDir): bool = eqImpl(x.string, y.string) @@ -180,20 +58,20 @@ when true: assert(not isAbsolute(f.string)) result = AbsoluteFile newStringOfCap(base.string.len + f.string.len) var state = 0 - canon(base.string, result.string, state) - canon(f.string, result.string, state) + addNormalizePath(base.string, result.string, state) + addNormalizePath(f.string, result.string, state) proc `/`*(base: AbsoluteDir; f: RelativeDir): AbsoluteDir = #assert isAbsolute(base.string) assert(not isAbsolute(f.string)) result = AbsoluteDir newStringOfCap(base.string.len + f.string.len) var state = 0 - canon(base.string, result.string, state) - canon(f.string, result.string, state) + addNormalizePath(base.string, result.string, state) + addNormalizePath(f.string, result.string, state) proc relativeTo*(fullPath: AbsoluteFile, baseFilename: AbsoluteDir; - sep = canonDirSep): RelativeFile = - RelativeFile(relativeTo(fullPath.string, baseFilename.string, sep)) + sep = DirSep): RelativeFile = + RelativeFile(relativePath(fullPath.string, baseFilename.string, sep)) proc toAbsolute*(file: string; base: AbsoluteDir): AbsoluteFile = if isAbsolute(file): result = AbsoluteFile(file) @@ -208,37 +86,8 @@ when true: proc writeFile*(x: AbsoluteFile; content: string) {.borrow.} when isMainModule: - doAssert canon"/foo/../bar" == "/bar" - doAssert canon"foo/../bar" == "bar" - - doAssert canon"/f/../bar///" == "/bar" - doAssert canon"f/..////bar" == "bar" - - doAssert canon"../bar" == "../bar" - doAssert canon"/../bar" == "/../bar" - - doAssert canon("foo/../../bar/") == "../bar" - doAssert canon("./bla/blob/") == "bla/blob" - doAssert canon(".hiddenFile") == ".hiddenFile" - doAssert canon("./bla/../../blob/./zoo.nim") == "../blob/zoo.nim" - - doAssert canon("C:/file/to/this/long") == "C:/file/to/this/long" - doAssert canon("") == "" - doAssert canon("foobar") == "foobar" - doAssert canon("f/////////") == "f" - - doAssert relativeTo("/foo/bar//baz.nim", "/foo") == "bar/baz.nim" - - doAssert relativeTo("/Users/me/bar/z.nim", "/Users/other/bad") == "../../me/bar/z.nim" - - doAssert relativeTo("/Users/me/bar/z.nim", "/Users/other") == "../me/bar/z.nim" - doAssert relativeTo("/Users///me/bar//z.nim", "//Users/") == "me/bar/z.nim" - doAssert relativeTo("/Users/me/bar/z.nim", "/Users/me") == "bar/z.nim" - doAssert relativeTo("", "/users/moo") == "" - doAssert relativeTo("foo", "") == "foo" - doAssert AbsoluteDir"/Users/me///" / RelativeFile"z.nim" == AbsoluteFile"/Users/me/z.nim" - doAssert relativeTo("/foo/bar.nim", "/foo/") == "bar.nim" + doAssert relativePath("/foo/bar.nim", "/foo/", '/') == "bar.nim" when isMainModule and defined(windows): let nasty = string(AbsoluteDir(r"C:\Users\rumpf\projects\nim\tests\nimble\nimbleDir\linkedPkgs\pkgB-#head\../../simplePkgs/pkgB-#head/") / RelativeFile"pkgA/module.nim") diff --git a/lib/pure/includes/osseps.nim b/lib/pure/includes/osseps.nim new file mode 100644 index 0000000000..9a79fe303d --- /dev/null +++ b/lib/pure/includes/osseps.nim @@ -0,0 +1,130 @@ +# Include file that implements 'DirSep' and friends. Do not import this when +# you also import ``os.nim``! + +const + doslikeFileSystem* = defined(windows) or defined(OS2) or defined(DOS) + +when defined(Nimdoc): # only for proper documentation: + const + CurDir* = '.' + ## The constant string used by the operating system to refer to the + ## current directory. + ## + ## For example: '.' for POSIX or ':' for the classic Macintosh. + + ParDir* = ".." + ## The constant string used by the operating system to refer to the + ## parent directory. + ## + ## For example: ".." for POSIX or "::" for the classic Macintosh. + + DirSep* = '/' + ## The character used by the operating system to separate pathname + ## components, for example, '/' for POSIX or ':' for the classic + ## Macintosh. + + AltSep* = '/' + ## An alternative character used by the operating system to separate + ## pathname components, or the same as `DirSep` if only one separator + ## character exists. This is set to '/' on Windows systems + ## where `DirSep` is a backslash. + + PathSep* = ':' + ## The character conventionally used by the operating system to separate + ## search patch components (as in PATH), such as ':' for POSIX + ## or ';' for Windows. + + FileSystemCaseSensitive* = true + ## true if the file system is case sensitive, false otherwise. Used by + ## `cmpPaths` to compare filenames properly. + + ExeExt* = "" + ## The file extension of native executables. For example: + ## "" for POSIX, "exe" on Windows. + + ScriptExt* = "" + ## The file extension of a script file. For example: "" for POSIX, + ## "bat" on Windows. + + DynlibFormat* = "lib$1.so" + ## The format string to turn a filename into a `DLL`:idx: file (also + ## called `shared object`:idx: on some operating systems). + +elif defined(macos): + const + CurDir* = ':' + ParDir* = "::" + DirSep* = ':' + AltSep* = Dirsep + PathSep* = ',' + FileSystemCaseSensitive* = false + ExeExt* = "" + ScriptExt* = "" + DynlibFormat* = "$1.dylib" + + # MacOS paths + # =========== + # MacOS directory separator is a colon ":" which is the only character not + # allowed in filenames. + # + # A path containing no colon or which begins with a colon is a partial + # path. + # E.g. ":kalle:petter" ":kalle" "kalle" + # + # All other paths are full (absolute) paths. E.g. "HD:kalle:" "HD:" + # When generating paths, one is safe if one ensures that all partial paths + # begin with a colon, and all full paths end with a colon. + # In full paths the first name (e g HD above) is the name of a mounted + # volume. + # These names are not unique, because, for instance, two diskettes with the + # same names could be inserted. This means that paths on MacOS are not + # waterproof. In case of equal names the first volume found will do. + # Two colons "::" are the relative path to the parent. Three is to the + # grandparent etc. +elif doslikeFileSystem: + const + CurDir* = '.' + ParDir* = ".." + DirSep* = '\\' # seperator within paths + AltSep* = '/' + PathSep* = ';' # seperator between paths + FileSystemCaseSensitive* = false + ExeExt* = "exe" + ScriptExt* = "bat" + DynlibFormat* = "$1.dll" +elif defined(PalmOS) or defined(MorphOS): + const + DirSep* = '/' + AltSep* = Dirsep + PathSep* = ';' + ParDir* = ".." + FileSystemCaseSensitive* = false + ExeExt* = "" + ScriptExt* = "" + DynlibFormat* = "$1.prc" +elif defined(RISCOS): + const + DirSep* = '.' + AltSep* = '.' + ParDir* = ".." # is this correct? + PathSep* = ',' + FileSystemCaseSensitive* = true + ExeExt* = "" + ScriptExt* = "" + DynlibFormat* = "lib$1.so" +else: # UNIX-like operating system + const + CurDir* = '.' + ParDir* = ".." + DirSep* = '/' + AltSep* = DirSep + PathSep* = ':' + FileSystemCaseSensitive* = when defined(macosx): false else: true + ExeExt* = "" + ScriptExt* = "" + DynlibFormat* = when defined(macosx): "lib$1.dylib" else: "lib$1.so" + +const + ExtSep* = '.' + ## The character which separates the base filename from the extension; + ## for example, the '.' in ``os.nim``. diff --git a/lib/pure/os.nim b/lib/pure/os.nim index 31610a59e5..1646d7c7a1 100644 --- a/lib/pure/os.nim +++ b/lib/pure/os.nim @@ -17,7 +17,7 @@ include "system/inclrtl" import - strutils + strutils, pathnorm when defined(nimscript): discard @@ -51,133 +51,7 @@ type OSErrorCode* = distinct int32 ## Specifies an OS Error Code. -const - doslikeFileSystem* = defined(windows) or defined(OS2) or defined(DOS) - -when defined(Nimdoc): # only for proper documentation: - const - CurDir* = '.' - ## The constant string used by the operating system to refer to the - ## current directory. - ## - ## For example: '.' for POSIX or ':' for the classic Macintosh. - - ParDir* = ".." - ## The constant string used by the operating system to refer to the - ## parent directory. - ## - ## For example: ".." for POSIX or "::" for the classic Macintosh. - - DirSep* = '/' - ## The character used by the operating system to separate pathname - ## components, for example, '/' for POSIX or ':' for the classic - ## Macintosh. - - AltSep* = '/' - ## An alternative character used by the operating system to separate - ## pathname components, or the same as `DirSep` if only one separator - ## character exists. This is set to '/' on Windows systems - ## where `DirSep` is a backslash. - - PathSep* = ':' - ## The character conventionally used by the operating system to separate - ## search patch components (as in PATH), such as ':' for POSIX - ## or ';' for Windows. - - FileSystemCaseSensitive* = true - ## true if the file system is case sensitive, false otherwise. Used by - ## `cmpPaths` to compare filenames properly. - - ExeExt* = "" - ## The file extension of native executables. For example: - ## "" for POSIX, "exe" on Windows. - - ScriptExt* = "" - ## The file extension of a script file. For example: "" for POSIX, - ## "bat" on Windows. - - DynlibFormat* = "lib$1.so" - ## The format string to turn a filename into a `DLL`:idx: file (also - ## called `shared object`:idx: on some operating systems). - -elif defined(macos): - const - CurDir* = ':' - ParDir* = "::" - DirSep* = ':' - AltSep* = Dirsep - PathSep* = ',' - FileSystemCaseSensitive* = false - ExeExt* = "" - ScriptExt* = "" - DynlibFormat* = "$1.dylib" - - # MacOS paths - # =========== - # MacOS directory separator is a colon ":" which is the only character not - # allowed in filenames. - # - # A path containing no colon or which begins with a colon is a partial - # path. - # E.g. ":kalle:petter" ":kalle" "kalle" - # - # All other paths are full (absolute) paths. E.g. "HD:kalle:" "HD:" - # When generating paths, one is safe if one ensures that all partial paths - # begin with a colon, and all full paths end with a colon. - # In full paths the first name (e g HD above) is the name of a mounted - # volume. - # These names are not unique, because, for instance, two diskettes with the - # same names could be inserted. This means that paths on MacOS are not - # waterproof. In case of equal names the first volume found will do. - # Two colons "::" are the relative path to the parent. Three is to the - # grandparent etc. -elif doslikeFileSystem: - const - CurDir* = '.' - ParDir* = ".." - DirSep* = '\\' # seperator within paths - AltSep* = '/' - PathSep* = ';' # seperator between paths - FileSystemCaseSensitive* = false - ExeExt* = "exe" - ScriptExt* = "bat" - DynlibFormat* = "$1.dll" -elif defined(PalmOS) or defined(MorphOS): - const - DirSep* = '/' - AltSep* = Dirsep - PathSep* = ';' - ParDir* = ".." - FileSystemCaseSensitive* = false - ExeExt* = "" - ScriptExt* = "" - DynlibFormat* = "$1.prc" -elif defined(RISCOS): - const - DirSep* = '.' - AltSep* = '.' - ParDir* = ".." # is this correct? - PathSep* = ',' - FileSystemCaseSensitive* = true - ExeExt* = "" - ScriptExt* = "" - DynlibFormat* = "lib$1.so" -else: # UNIX-like operating system - const - CurDir* = '.' - ParDir* = ".." - DirSep* = '/' - AltSep* = DirSep - PathSep* = ':' - FileSystemCaseSensitive* = when defined(macosx): false else: true - ExeExt* = "" - ScriptExt* = "" - DynlibFormat* = when defined(macosx): "lib$1.dylib" else: "lib$1.so" - -const - ExtSep* = '.' - ## The character which separates the base filename from the extension; - ## for example, the '.' in ``os.nim``. +include "includes/osseps" proc normalizePathEnd(path: var string, trailingSep = false) = ## ensures ``path`` has exactly 0 or 1 trailing `DirSep`, depending on @@ -192,7 +66,7 @@ proc normalizePathEnd(path: var string, trailingSep = false) = path.setLen(i) # foo => foo/ path.add DirSep - elif i>0: + elif i > 0: # foo// => foo path.setLen(i) else: @@ -228,29 +102,37 @@ proc joinPath*(head, tail: string): string {. ## assert joinPath("", "lib") == "lib" ## assert joinPath("", "/lib") == "/lib" ## assert joinPath("usr/", "/lib") == "usr/lib" - if len(head) == 0: - result = tail - elif head[len(head)-1] in {DirSep, AltSep}: - if tail.len > 0 and tail[0] in {DirSep, AltSep}: - result = head & substr(tail, 1) + result = newStringOfCap(head.len + tail.len) + var state = 0 + addNormalizePath(head, result, state, DirSep) + addNormalizePath(tail, result, state, DirSep) + when false: + if len(head) == 0: + result = tail + elif head[len(head)-1] in {DirSep, AltSep}: + if tail.len > 0 and tail[0] in {DirSep, AltSep}: + result = head & substr(tail, 1) + else: + result = head & tail else: - result = head & tail - else: - if tail.len > 0 and tail[0] in {DirSep, AltSep}: - result = head & tail - else: - result = head & DirSep & tail + if tail.len > 0 and tail[0] in {DirSep, AltSep}: + result = head & tail + else: + result = head & DirSep & tail proc joinPath*(parts: varargs[string]): string {.noSideEffect, rtl, extern: "nos$1OpenArray".} = ## The same as `joinPath(head, tail)`, but works with any number of ## directory parts. You need to pass at least one element or the proc ## will assert in debug builds and crash on release builds. - result = parts[0] - for i in 1..high(parts): - result = joinPath(result, parts[i]) + var estimatedLen = 0 + for p in parts: estimatedLen += p.len + result = newStringOfCap(estimatedLen) + var state = 0 + for i in 0..high(parts): + addNormalizePath(parts[i], result, state, DirSep) -proc `/` * (head, tail: string): string {.noSideEffect.} = +proc `/`*(head, tail: string): string {.noSideEffect.} = ## The same as ``joinPath(head, tail)`` ## ## Here are some examples for Unix: @@ -287,6 +169,71 @@ proc splitPath*(path: string): tuple[head, tail: string] {. result.head = "" result.tail = path +when FileSystemCaseSensitive: + template `!=?`(a, b: char): bool = toLowerAscii(a) != toLowerAscii(b) +else: + template `!=?`(a, b: char): bool = a != b + +proc relativePath*(path, base: string; sep = DirSep): string {. + noSideEffect, rtl, extern: "nos$1", raises: [].} = + ## Converts `path` to a path relative to `base`. + ## The `sep` is used for the path normalizations, this can be useful to + ## ensure the relative path only contains '/' so that it can be used for + ## URL constructions. + runnableExamples: + doAssert relativePath("/Users/me/bar/z.nim", "/Users/other/bad", '/') == "../../me/bar/z.nim" + doAssert relativePath("/Users/me/bar/z.nim", "/Users/other", '/') == "../me/bar/z.nim" + doAssert relativePath("/Users///me/bar//z.nim", "//Users/", '/') == "me/bar/z.nim" + doAssert relativePath("/Users/me/bar/z.nim", "/Users/me", '/') == "bar/z.nim" + doAssert relativePath("", "/users/moo", '/') == "" + + + # Todo: If on Windows, path and base do not agree on the drive letter, + # return `path` as is. + if path.len == 0: return "" + var f, b: PathIter + var ff = (0, -1) + var bb = (0, -1) # (int, int) + result = newStringOfCap(path.len) + # skip the common prefix: + while f.hasNext(path) and b.hasNext(base): + ff = next(f, path) + bb = next(b, base) + let diff = ff[1] - ff[0] + if diff != bb[1] - bb[0]: break + var same = true + for i in 0..diff: + if path[i + ff[0]] !=? base[i + bb[0]]: + same = false + break + if not same: break + ff = (0, -1) + bb = (0, -1) + # for i in 0..diff: + # result.add base[i + bb[0]] + + # /foo/bar/xxx/ -- base + # /foo/bar/baz -- path path + # ../baz + # every directory that is in 'base', needs to add '..' + while true: + if bb[1] >= bb[0]: + if result.len > 0 and result[^1] != sep: + result.add sep + result.add ".." + if not b.hasNext(base): break + bb = b.next(base) + + # add the rest of 'path': + while true: + if ff[1] >= ff[0]: + if result.len > 0 and result[^1] != sep: + result.add sep + for i in 0..ff[1] - ff[0]: + result.add path[i + ff[0]] + if not f.hasNext(path): break + ff = f.next(path) + proc parentDirPos(path: string): int = var q = 1 if len(path) >= 1 and path[len(path)-1] in {DirSep, AltSep}: q = 2 @@ -478,14 +425,17 @@ proc cmpPaths*(pathA, pathB: string): int {. doAssert cmpPaths("foo", "Foo") == 0 elif defined(posix): doAssert cmpPaths("foo", "Foo") > 0 + + let a = normalizePath(pathA) + let b = normalizePath(pathB) if FileSystemCaseSensitive: - result = cmp(pathA, pathB) + result = cmp(a, b) else: when defined(nimscript): - result = cmpic(pathA, pathB) + result = cmpic(a, b) elif defined(nimdoc): discard else: - result = cmpIgnoreCase(pathA, pathB) + result = cmpIgnoreCase(a, b) proc isAbsolute*(path: string): bool {.rtl, noSideEffect, extern: "nos$1".} = ## Checks whether a given `path` is absolute. @@ -523,12 +473,10 @@ proc unixToNativePath*(path: string, drive=""): string {. ## which drive label to use during absolute path conversion. ## `drive` defaults to the drive of the current working directory, and is ## ignored on systems that do not have a concept of "drives". - when defined(unix): result = path else: - if path.len == 0: - return "" + if path.len == 0: return "" var start: int if path[0] == '/': diff --git a/lib/pure/pathnorm.nim b/lib/pure/pathnorm.nim new file mode 100644 index 0000000000..696f6b2ef8 --- /dev/null +++ b/lib/pure/pathnorm.nim @@ -0,0 +1,90 @@ +# +# +# Nim's Runtime Library +# (c) Copyright 2018 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## OS-Path normalization. Used by ``os.nim`` but also +## generally useful for dealing with paths. Note that this module +## does not provide a stable API. + +# Yes, this uses import here, not include so that +# we don't end up exporting these symbols from pathnorm and os: +import "includes/osseps" + +type + PathIter* = object + i, prev: int + notFirst: bool + +proc hasNext*(it: PathIter; x: string): bool = + it.i < x.len + +proc next*(it: var PathIter; x: string): (int, int) = + it.prev = it.i + if not it.notFirst and x[it.i] in {DirSep, AltSep}: + # absolute path: + inc it.i + else: + while it.i < x.len and x[it.i] notin {DirSep, AltSep}: inc it.i + if it.i > it.prev: + result = (it.prev, it.i-1) + elif hasNext(it, x): + result = next(it, x) + + # skip all separators: + while it.i < x.len and x[it.i] in {DirSep, AltSep}: inc it.i + it.notFirst = true + +iterator dirs(x: string): (int, int) = + var it: PathIter + while hasNext(it, x): yield next(it, x) + +proc isDot(x: string; bounds: (int, int)): bool = + bounds[1] == bounds[0] and x[bounds[0]] == '.' + +proc isDotDot(x: string; bounds: (int, int)): bool = + bounds[1] == bounds[0] + 1 and x[bounds[0]] == '.' and x[bounds[0]+1] == '.' + +proc isSlash(x: string; bounds: (int, int)): bool = + bounds[1] == bounds[0] and x[bounds[0]] in {DirSep, AltSep} + +proc addNormalizePath*(x: string; result: var string; state: var int; dirSep = DirSep) = + ## Low level proc. Undocumented. + + # state: 0th bit set if isAbsolute path. Other bits count + # the number of path components. + for b in dirs(x): + if (state shr 1 == 0) and isSlash(x, b): + result.add dirSep + state = state or 1 + elif result.len > (state and 1) and isDotDot(x, b): + var d = result.len + # f/.. + while (d-1) > (state and 1) and result[d-1] notin {DirSep, AltSep}: + dec d + if d > 0: setLen(result, d-1) + elif isDot(x, b): + discard "discard the dot" + elif b[1] >= b[0]: + if result.len > 0 and result[^1] notin {DirSep, AltSep}: + result.add dirSep + result.add substr(x, b[0], b[1]) + inc state, 2 + +proc normalizePath*(path: string; dirSep = DirSep): string = + ## Example: + ## + ## .. code-block:: nim + ## assert normalizePath("./foo//bar/../baz") == "foo/baz" + ## + ## + ## - Turns multiple slashes into single slashes. + ## - Resolves '/foo/../bar' to '/bar'. + ## - Removes './' from the path. + result = newStringOfCap(path.len) + var state = 0 + addNormalizePath(path, result, state, dirSep) diff --git a/tests/stdlib/tospaths.nim b/tests/stdlib/tospaths.nim index bee9bab76d..563584c4c4 100644 --- a/tests/stdlib/tospaths.nim +++ b/tests/stdlib/tospaths.nim @@ -1,63 +1,94 @@ -discard """ - output: "" -""" -# test the ospaths module - -import os - -doAssert unixToNativePath("") == "" -doAssert unixToNativePath(".") == $CurDir -doAssert unixToNativePath("..") == $ParDir -doAssert isAbsolute(unixToNativePath("/")) -doAssert isAbsolute(unixToNativePath("/", "a")) -doAssert isAbsolute(unixToNativePath("/a")) -doAssert isAbsolute(unixToNativePath("/a", "a")) -doAssert isAbsolute(unixToNativePath("/a/b")) -doAssert isAbsolute(unixToNativePath("/a/b", "a")) -doAssert unixToNativePath("a/b") == joinPath("a", "b") - -when defined(macos): - doAssert unixToNativePath("./") == ":" - doAssert unixToNativePath("./abc") == ":abc" - doAssert unixToNativePath("../abc") == "::abc" - doAssert unixToNativePath("../../abc") == ":::abc" - doAssert unixToNativePath("/abc", "a") == "abc" - doAssert unixToNativePath("/abc/def", "a") == "abc:def" -elif doslikeFileSystem: - doAssert unixToNativePath("./") == ".\\" - doAssert unixToNativePath("./abc") == ".\\abc" - doAssert unixToNativePath("../abc") == "..\\abc" - doAssert unixToNativePath("../../abc") == "..\\..\\abc" - doAssert unixToNativePath("/abc", "a") == "a:\\abc" - doAssert unixToNativePath("/abc/def", "a") == "a:\\abc\\def" -else: - #Tests for unix - doAssert unixToNativePath("./") == "./" - doAssert unixToNativePath("./abc") == "./abc" - doAssert unixToNativePath("../abc") == "../abc" - doAssert unixToNativePath("../../abc") == "../../abc" - doAssert unixToNativePath("/abc", "a") == "/abc" - doAssert unixToNativePath("/abc/def", "a") == "/abc/def" - -block extractFilenameTest: - doAssert extractFilename("") == "" - when defined(posix): - doAssert extractFilename("foo/bar") == "bar" - doAssert extractFilename("foo/bar.txt") == "bar.txt" - doAssert extractFilename("foo/") == "" - doAssert extractFilename("/") == "" - when doslikeFileSystem: - doAssert extractFilename(r"foo\bar") == "bar" - doAssert extractFilename(r"foo\bar.txt") == "bar.txt" - doAssert extractFilename(r"foo\") == "" - doAssert extractFilename(r"C:\") == "" - -block lastPathPartTest: - doAssert lastPathPart("") == "" - when defined(posix): - doAssert lastPathPart("foo/bar.txt") == "bar.txt" - doAssert lastPathPart("foo/") == "foo" - doAssert lastPathPart("/") == "" - when doslikeFileSystem: - doAssert lastPathPart(r"foo\bar.txt") == "bar.txt" - doAssert lastPathPart(r"foo\") == "foo" +discard """ + output: "" +""" +# test the ospaths module + +import os, pathnorm + +doAssert unixToNativePath("") == "" +doAssert unixToNativePath(".") == $CurDir +doAssert unixToNativePath("..") == $ParDir +doAssert isAbsolute(unixToNativePath("/")) +doAssert isAbsolute(unixToNativePath("/", "a")) +doAssert isAbsolute(unixToNativePath("/a")) +doAssert isAbsolute(unixToNativePath("/a", "a")) +doAssert isAbsolute(unixToNativePath("/a/b")) +doAssert isAbsolute(unixToNativePath("/a/b", "a")) +doAssert unixToNativePath("a/b") == joinPath("a", "b") + +when defined(macos): + doAssert unixToNativePath("./") == ":" + doAssert unixToNativePath("./abc") == ":abc" + doAssert unixToNativePath("../abc") == "::abc" + doAssert unixToNativePath("../../abc") == ":::abc" + doAssert unixToNativePath("/abc", "a") == "abc" + doAssert unixToNativePath("/abc/def", "a") == "abc:def" +elif doslikeFileSystem: + doAssert unixToNativePath("./") == ".\\" + doAssert unixToNativePath("./abc") == ".\\abc" + doAssert unixToNativePath("../abc") == "..\\abc" + doAssert unixToNativePath("../../abc") == "..\\..\\abc" + doAssert unixToNativePath("/abc", "a") == "a:\\abc" + doAssert unixToNativePath("/abc/def", "a") == "a:\\abc\\def" +else: + #Tests for unix + doAssert unixToNativePath("./") == "./" + doAssert unixToNativePath("./abc") == "./abc" + doAssert unixToNativePath("../abc") == "../abc" + doAssert unixToNativePath("../../abc") == "../../abc" + doAssert unixToNativePath("/abc", "a") == "/abc" + doAssert unixToNativePath("/abc/def", "a") == "/abc/def" + +block extractFilenameTest: + doAssert extractFilename("") == "" + when defined(posix): + doAssert extractFilename("foo/bar") == "bar" + doAssert extractFilename("foo/bar.txt") == "bar.txt" + doAssert extractFilename("foo/") == "" + doAssert extractFilename("/") == "" + when doslikeFileSystem: + doAssert extractFilename(r"foo\bar") == "bar" + doAssert extractFilename(r"foo\bar.txt") == "bar.txt" + doAssert extractFilename(r"foo\") == "" + doAssert extractFilename(r"C:\") == "" + +block lastPathPartTest: + doAssert lastPathPart("") == "" + when defined(posix): + doAssert lastPathPart("foo/bar.txt") == "bar.txt" + doAssert lastPathPart("foo/") == "foo" + doAssert lastPathPart("/") == "" + when doslikeFileSystem: + doAssert lastPathPart(r"foo\bar.txt") == "bar.txt" + doAssert lastPathPart(r"foo\") == "foo" + +template canon(x): untyped = normalizePath(x, '/') +doAssert canon"/foo/../bar" == "/bar" +doAssert canon"foo/../bar" == "bar" + +doAssert canon"/f/../bar///" == "/bar" +doAssert canon"f/..////bar" == "bar" + +doAssert canon"../bar" == "../bar" +doAssert canon"/../bar" == "/../bar" + +doAssert canon("foo/../../bar/") == "../bar" +doAssert canon("./bla/blob/") == "bla/blob" +doAssert canon(".hiddenFile") == ".hiddenFile" +doAssert canon("./bla/../../blob/./zoo.nim") == "../blob/zoo.nim" + +doAssert canon("C:/file/to/this/long") == "C:/file/to/this/long" +doAssert canon("") == "" +doAssert canon("foobar") == "foobar" +doAssert canon("f/////////") == "f" + +doAssert relativePath("/foo/bar//baz.nim", "/foo", '/') == "bar/baz.nim" +doAssert normalizePath("./foo//bar/../baz", '/') == "foo/baz" + +doAssert relativePath("/Users/me/bar/z.nim", "/Users/other/bad", '/') == "../../me/bar/z.nim" + +doAssert relativePath("/Users/me/bar/z.nim", "/Users/other", '/') == "../me/bar/z.nim" +doAssert relativePath("/Users///me/bar//z.nim", "//Users/", '/') == "me/bar/z.nim" +doAssert relativePath("/Users/me/bar/z.nim", "/Users/me", '/') == "bar/z.nim" +doAssert relativePath("", "/users/moo", '/') == "" +doAssert relativePath("foo", "", '/') == "foo"