Fix parseUri to sanitize urls containing ASCII newline or tab (#17967)

* Fix parseUri to sanitize urls containing ASCII newline or tab

* Fix ups based on review

Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>

* Additional fix ups based on review

- Avoid unnecessary `removeUnsafeBytesFromUri` call if parseUri is strict
- Move some parseUri tests to uri module test file

Co-authored-by: Dominik Picheta <dominikpicheta@googlemail.com>

* Update changelog

Co-authored-by: Timothee Cour <timothee.cour2@gmail.com>
Co-authored-by: Dominik Picheta <dominikpicheta@googlemail.com>
This commit is contained in:
Beshr Kayali
2021-05-09 20:24:00 +02:00
committed by GitHub
parent d84a3b10b5
commit f4dd95f3be
3 changed files with 42 additions and 3 deletions

View File

@@ -299,6 +299,7 @@
- Added `copyWithin` [for `seq` and `array` for JavaScript targets](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/copyWithin).
- Added optional `strict` argument to `parseUri` of `uri` module to raise a `UriParseError` if input contains newline or tab characters, or [remove them in non-strict case](https://url.spec.whatwg.org/#concept-basic-url-parser).
## Language changes

View File

@@ -51,6 +51,8 @@ type
UriParseError* = object of ValueError
# https://url.spec.whatwg.org/#concept-basic-url-parser
const unsafeUrlBytesToRemove = {'\t', '\r', '\n'}
proc uriParseError*(msg: string) {.noreturn.} =
## Raises a `UriParseError` exception with message `msg`.
@@ -261,7 +263,11 @@ func resetUri(uri: var Uri) =
else:
f = false
func parseUri*(uri: string, result: var Uri) =
func removeUnsafeBytesFromUri(uri: string): string =
for c in uri:
if c notin unsafeUrlBytesToRemove: result.add c
func parseUri*(uri: string, result: var Uri, strict = true) =
## Parses a URI. The `result` variable will be cleared before.
##
## **See also:**
@@ -273,6 +279,26 @@ func parseUri*(uri: string, result: var Uri) =
assert res.scheme == "https"
assert res.hostname == "nim-lang.org"
assert res.path == "/docs/manual.html"
# Non-strict
res = initUri()
parseUri("https://nim-lang\n.org\t/docs/", res, strict=false)
assert res.scheme == "https"
assert res.hostname == "nim-lang.org"
assert res.path == "/docs/"
# Strict
res = initUri()
doAssertRaises(UriParseError):
parseUri("https://nim-lang\n.org\t/docs/", res)
var uri = uri
if strict:
for c in uri:
if c in unsafeUrlBytesToRemove: uriParseError("Invalid uri '$#'" % uri)
else:
uri = removeUnsafeBytesFromUri(uri)
resetUri(result)
var i = 0
@@ -309,7 +335,7 @@ func parseUri*(uri: string, result: var Uri) =
# Path
parsePath(uri, i, result)
func parseUri*(uri: string): Uri =
func parseUri*(uri: string, strict = true): Uri =
## Parses a URI and returns it.
##
## **See also:**
@@ -320,7 +346,7 @@ func parseUri*(uri: string): Uri =
assert res.password == "Password"
assert res.scheme == "ftp"
result = initUri()
parseUri(uri, result)
parseUri(uri, result, strict)
func removeDotSegments(path: string): string =
## Collapses `..` and `.` in `path` in a similar way as done in `os.normalizedPath`

View File

@@ -141,6 +141,18 @@ template main() =
doAssert test.port == ""
doAssert test.path == "/foo/bar/baz.txt"
block: # Strict
doAssertRaises(UriParseError):
discard parseUri("https://nim-lang\n.org\t/docs/\nalert('msg\r\n')/?query\n=\tvalue#frag\nment")
# Non-strict would sanitize newline and tab characters from input
let test = parseUri("https://nim-lang\n.org\t/docs/\nalert('msg\r\n')/?query\n=\tvalue#frag\nment", strict=false)
assert test.scheme == "https"
assert test.hostname == "nim-lang.org"
assert test.path == "/docs/alert('msg')/"
assert test.query == "query=value"
assert test.anchor == "fragment"
block: # combine
block:
let concat = combine(parseUri("http://google.com/foo/bar/"), parseUri("baz"))