Fix parseUri to sanitize urls containing ASCII newline or tab (#17967)

* Fix parseUri to sanitize urls containing ASCII newline or tab * Fix ups based on review Co-authored-by: Timothee Cour <timothee.cour2@gmail.com> * Additional fix ups based on review - Avoid unnecessary `removeUnsafeBytesFromUri` call if parseUri is strict - Move some parseUri tests to uri module test file Co-authored-by: Dominik Picheta <dominikpicheta@googlemail.com> * Update changelog Co-authored-by: Timothee Cour <timothee.cour2@gmail.com> Co-authored-by: Dominik Picheta <dominikpicheta@googlemail.com>
2026-02-12 14:23:45 +00:00 · 2021-05-09 20:24:00 +02:00
parent d84a3b10b5
commit f4dd95f3be
3 changed files with 42 additions and 3 deletions
--- a/changelog.md
+++ b/changelog.md
@@ -299,6 +299,7 @@

 - Added `copyWithin` [for `seq` and `array` for JavaScript targets](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/copyWithin).

+- Added optional `strict` argument to `parseUri` of `uri` module to raise a `UriParseError` if input contains newline or tab characters, or [remove them in non-strict case](https://url.spec.whatwg.org/#concept-basic-url-parser).

 ## Language changes

--- a/lib/pure/uri.nim
+++ b/lib/pure/uri.nim
@@ -51,6 +51,8 @@ type

  UriParseError* = object of ValueError

+# https://url.spec.whatwg.org/#concept-basic-url-parser
+const unsafeUrlBytesToRemove = {'\t', '\r', '\n'}

 proc uriParseError*(msg: string) {.noreturn.} =
  ## Raises a `UriParseError` exception with message `msg`.
@@ -261,7 +263,11 @@ func resetUri(uri: var Uri) =
    else:
      f = false

-func parseUri*(uri: string, result: var Uri) =
+func removeUnsafeBytesFromUri(uri: string): string =
+  for c in uri:
+    if c notin unsafeUrlBytesToRemove: result.add c
+
+func parseUri*(uri: string, result: var Uri, strict = true) =
  ## Parses a URI. The `result` variable will be cleared before.
  ##
  ## **See also:**
@@ -273,6 +279,26 @@ func parseUri*(uri: string, result: var Uri) =
    assert res.scheme == "https"
    assert res.hostname == "nim-lang.org"
    assert res.path == "/docs/manual.html"
+
+    # Non-strict
+    res = initUri()
+    parseUri("https://nim-lang\n.org\t/docs/", res, strict=false)
+    assert res.scheme == "https"
+    assert res.hostname == "nim-lang.org"
+    assert res.path == "/docs/"
+
+    # Strict
+    res = initUri()
+    doAssertRaises(UriParseError):
+      parseUri("https://nim-lang\n.org\t/docs/", res)
+
+  var uri = uri
+  if strict:
+    for c in uri:
+      if c in unsafeUrlBytesToRemove: uriParseError("Invalid uri '$#'" % uri)
+  else:
+    uri = removeUnsafeBytesFromUri(uri)
+
  resetUri(result)

  var i = 0
@@ -309,7 +335,7 @@ func parseUri*(uri: string, result: var Uri) =
  # Path
  parsePath(uri, i, result)

-func parseUri*(uri: string): Uri =
+func parseUri*(uri: string, strict = true): Uri =
  ## Parses a URI and returns it.
  ##
  ## **See also:**
@@ -320,7 +346,7 @@ func parseUri*(uri: string): Uri =
    assert res.password == "Password"
    assert res.scheme == "ftp"
  result = initUri()
-  parseUri(uri, result)
+  parseUri(uri, result, strict)

 func removeDotSegments(path: string): string =
  ## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath`
--- a/tests/stdlib/turi.nim
+++ b/tests/stdlib/turi.nim
@@ -141,6 +141,18 @@ template main() =
      doAssert test.port == ""
      doAssert test.path == "/foo/bar/baz.txt"

+    block: # Strict
+      doAssertRaises(UriParseError):
+        discard parseUri("https://nim-lang\n.org\t/docs/\nalert('msg\r\n')/?query\n=\tvalue#frag\nment")
+
+      # Non-strict would sanitize newline and tab characters from input
+      let test = parseUri("https://nim-lang\n.org\t/docs/\nalert('msg\r\n')/?query\n=\tvalue#frag\nment", strict=false)
+      assert test.scheme == "https"
+      assert test.hostname == "nim-lang.org"
+      assert test.path == "/docs/alert('msg')/"
+      assert test.query == "query=value"
+      assert test.anchor == "fragment"
+
  block: # combine
    block:
      let concat = combine(parseUri("http://google.com/foo/bar/"), parseUri("baz"))