From 0fcf1af7654aa4ac9934168ec84113a95c57a608 Mon Sep 17 00:00:00 2001 From: Timothee Cour Date: Sat, 20 Feb 2021 08:58:54 -0800 Subject: [PATCH] uri: document removeDotSegments, add tests, show failure modes (#17064) * uri: document removeDotSegments, add tests, show failure modes * address comments --- lib/pure/uri.nim | 32 +++++++++++++++++++++++++++++++- tests/stdlib/turi.nim | 9 +-------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/lib/pure/uri.nim b/lib/pure/uri.nim index 4b25ba084c..920529ecff 100644 --- a/lib/pure/uri.nim +++ b/lib/pure/uri.nim @@ -339,9 +339,17 @@ func parseUri*(uri: string): Uri = parseUri(uri, result) func removeDotSegments(path: string): string = + ## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath` + ## Caution: this is buggy. + runnableExamples: + assert removeDotSegments("a1/a2/../a3/a4/a5/./a6/a7/.//./") == "a1/a3/a4/a5/a6/a7/" + assert removeDotSegments("http://www.ai.") == "http://www.ai." + # xxx adapt or reuse `pathnorm.normalizePath(path, '/')` to make this more reliable, but + # taking into account url specificities such as not collapsing leading `//` in scheme + # `https://`. see `turi` for failing tests. if path.len == 0: return "" var collection: seq[string] = @[] - let endsWithSlash = path[path.len-1] == '/' + let endsWithSlash = path.endsWith '/' var i = 0 var currentSegment = "" while i < path.len: @@ -547,3 +555,25 @@ proc getDataUri*(data, mime: string, encoding = "utf-8"): string {.since: (1, 3) runnableExamples: static: doAssert getDataUri("Nim", "text/plain") == "data:text/plain;charset=utf-8;base64,Tmlt" assert encoding.len > 0 and mime.len > 0 # Must *not* be URL-Safe, see RFC-2397 result = "data:" & mime & ";charset=" & encoding & ";base64," & base64.encode(data) + +when isMainModule and defined(testing): + # needed (pending https://github.com/nim-lang/Nim/pull/11865) because + # `removeDotSegments` is private, the other tests are in `turi`. + block: # removeDotSegments + # `removeDotSegments` is exported for -d:testing only + doAssert removeDotSegments("/foo/bar/baz") == "/foo/bar/baz" + doAssert removeDotSegments("") == "" # empty test + doAssert removeDotSegments(".") == "." # trailing period + doAssert removeDotSegments("a1/a2/../a3/a4/a5/./a6/a7/././") == "a1/a3/a4/a5/a6/a7/" + doAssert removeDotSegments("https://a1/a2/../a3/a4/a5/./a6/a7/././") == "https://a1/a3/a4/a5/a6/a7/" + doAssert removeDotSegments("http://a1/a2") == "http://a1/a2" + doAssert removeDotSegments("http://www.ai.") == "http://www.ai." + when false: # xxx these cases are buggy + # this should work, refs https://webmasters.stackexchange.com/questions/73934/how-can-urls-have-a-dot-at-the-end-e-g-www-bla-de + doAssert removeDotSegments("http://www.ai./") == "http://www.ai./" # fails + echo removeDotSegments("http://www.ai./") # http://www.ai/ + echo removeDotSegments("a/b.../c") # b.c + echo removeDotSegments("a/b../c") # bc + echo removeDotSegments("a/.../c") # .c + echo removeDotSegments("a//../b") # a/b + echo removeDotSegments("a/b/c//") # a/b/c// diff --git a/tests/stdlib/turi.nim b/tests/stdlib/turi.nim index 07f3f17f8a..1a6f375209 100644 --- a/tests/stdlib/turi.nim +++ b/tests/stdlib/turi.nim @@ -1,10 +1,8 @@ discard """ targets: "c js" - joinable: false # because of `include uri` """ -# import std/uri # pending https://github.com/nim-lang/Nim/pull/11865 -include uri # because of `removeDotSegments` +import std/uri from std/sequtils import toSeq template main() = @@ -171,11 +169,6 @@ template main() = let test = parseUri("http://example.com/foo/") / "/bar/asd" doAssert test.path == "/foo/bar/asd" - block: # removeDotSegments - doAssert removeDotSegments("/foo/bar/baz") == "/foo/bar/baz" - doAssert removeDotSegments("") == "" # empty test - doAssert removeDotSegments(".") == "." # trailing period - block: # bug #3207 doAssert parseUri("http://qq/1").combine(parseUri("https://qqq")).`$` == "https://qqq"