Improve uri.parseQuery to never raise an error (#16647)

In case of malformed query string where there is `=` on the value, handle
this character as part of the value instead of throwing an error.

The following query string should no longer crash a program:

    key=value&key2=x=1

It will be interpreted as [("key", "value"), ("key2", "x=1")]

This is correct according to latest WhatWG's HTML5 specification
recarding the urlencoded parser:
https://url.spec.whatwg.org/#concept-urlencoded-parser

Older behavior can be restored using the -d:nimLegacyParseQueryStrict
flag.
This commit is contained in:
Mildred Ki'Lya
2021-01-12 13:42:05 +01:00
committed by GitHub
parent bb3c6d0797
commit 71db2be833
4 changed files with 38 additions and 27 deletions

View File

@@ -96,6 +96,16 @@
with other backends. see #9125. Use `-d:nimLegacyJsRound` for previous behavior.
- Added `socketstream` module that wraps sockets in the stream interface
- Changed the behavior of `uri.decodeQuery` when there are unencoded `=`
characters in the decoded values. Prior versions would raise an error. This is
no longer the case to comply with the HTML spec and other languages
implementations. Old behavior can be obtained with
`-d:nimLegacyParseQueryStrict`. `cgi.decodeData` which uses the same
underlying code is also updated the same way.
- Added `math.signbit`.
- Removed the optional `longestMatch` parameter of the `critbits._WithPrefix` iterators (it never worked reliably)

View File

@@ -84,11 +84,8 @@ proc getEncodedData(allowedMethods: set[RequestMethod]): string =
iterator decodeData*(data: string): tuple[key, value: TaintedString] =
## Reads and decodes CGI data and yields the (name, value) pairs the
## data consists of.
try:
for (key, value) in uri.decodeQuery(data):
yield (key, value)
except UriParseError as e:
cgiError(e.msg)
for (key, value) in uri.decodeQuery(data):
yield (key, value)
iterator decodeData*(allowedMethods: set[RequestMethod] =
{methodNone, methodPost, methodGet}): tuple[key, value: TaintedString] =
@@ -96,11 +93,8 @@ iterator decodeData*(allowedMethods: set[RequestMethod] =
## data consists of. If the client does not use a method listed in the
## `allowedMethods` set, a `CgiError` exception is raised.
let data = getEncodedData(allowedMethods)
try:
for (key, value) in uri.decodeQuery(data):
yield (key, value)
except UriParseError as e:
cgiError(e.msg)
for (key, value) in uri.decodeQuery(data):
yield (key, value)
proc readData*(allowedMethods: set[RequestMethod] =
{methodNone, methodPost, methodGet}): StringTableRef =

View File

@@ -161,22 +161,26 @@ func encodeQuery*(query: openArray[(string, string)], usePlus = true,
result.add(encodeUrl(val, usePlus))
iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
## Reads and decodes query string ``data`` and yields the (key, value) pairs the
## data consists of.
## Reads and decodes query string `data` and yields the `(key, value)` pairs
## the data consists of. If compiled with `-d:nimLegacyParseQueryStrict`, an
## error is raised when there is an unencoded `=` character in a decoded
## value, which was the behavior in Nim < 1.5.1
runnableExamples:
import std/sugar
let s = collect(newSeq):
for k, v in decodeQuery("foo=1&bar=2"): (k, v)
doAssert s == @[("foo", "1"), ("bar", "2")]
import std/sequtils
doAssert toSeq(decodeQuery("foo=1&bar=2=3")) == @[("foo", "1"), ("bar", "2=3")]
doAssert toSeq(decodeQuery("&a&=b&=&&")) == @[("", ""), ("a", ""), ("", "b"), ("", ""), ("", "")]
proc parseData(data: string, i: int, field: var string): int =
proc parseData(data: string, i: int, field: var string, sep: char): int =
result = i
while result < data.len:
case data[result]
let c = data[result]
case c
of '%': add(field, decodePercent(data, result))
of '+': add(field, ' ')
of '=', '&': break
else: add(field, data[result])
of '&': break
else:
if c == sep: break
else: add(field, data[result])
inc(result)
var i = 0
@@ -185,16 +189,20 @@ iterator decodeQuery*(data: string): tuple[key, value: TaintedString] =
# decode everything in one pass:
while i < data.len:
setLen(name, 0) # reuse memory
i = parseData(data, i, name)
i = parseData(data, i, name, '=')
setLen(value, 0) # reuse memory
if i < data.len and data[i] == '=':
inc(i) # skip '='
i = parseData(data, i, value)
when defined(nimLegacyParseQueryStrict):
i = parseData(data, i, value, '=')
else:
i = parseData(data, i, value, '&')
yield (name.TaintedString, value.TaintedString)
if i < data.len:
if data[i] == '&': inc(i)
else:
uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
when defined(nimLegacyParseQueryStrict):
if data[i] != '&':
uriParseError("'&' expected at index '$#' for '$#'" % [$i, data])
inc(i)
func parseAuthority(authority: string, result: var Uri) =
var i = 0

View File

@@ -287,8 +287,7 @@ template main() =
block: # decodeQuery
doAssert toSeq(decodeQuery("a=1&b=0")) == @[("a", "1"), ("b", "0")]
doAssertRaises(UriParseError):
discard toSeq(decodeQuery("a=1&b=2c=6"))
doAssert toSeq(decodeQuery("a=1&b=2c=6")) == @[("a", "1"), ("b", "2c=6")]
static: main()
main()