system/unicode: check for buffer overflows; refs #5284

This commit is contained in:
Araq
2017-02-08 15:22:36 +01:00
parent 346ea6d171
commit 3caf108425
2 changed files with 78 additions and 43 deletions

View File

@@ -49,6 +49,8 @@ proc runeLenAt*(s: string, i: Natural): int =
elif ord(s[i]) shr 1 == 0b1111110: result = 6
else: result = 1
const replRune = Rune(0xFFFD)
template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
## Returns the Unicode character ``s[i]`` in ``result``. If ``doInc == true``
## ``i`` is incremented by the number of bytes that have been processed.
@@ -58,49 +60,69 @@ template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
when doInc: inc(i)
elif ord(s[i]) shr 5 == 0b110:
# assert(ord(s[i+1]) shr 6 == 0b10)
result = Rune((ord(s[i]) and (ones(5))) shl 6 or
(ord(s[i+1]) and ones(6)))
when doInc: inc(i, 2)
if i <= s.len - 2:
result = Rune((ord(s[i]) and (ones(5))) shl 6 or
(ord(s[i+1]) and ones(6)))
when doInc: inc(i, 2)
else:
result = replRune
when doInc: inc(i)
elif ord(s[i]) shr 4 == 0b1110:
# assert(ord(s[i+1]) shr 6 == 0b10)
# assert(ord(s[i+2]) shr 6 == 0b10)
result = Rune((ord(s[i]) and ones(4)) shl 12 or
(ord(s[i+1]) and ones(6)) shl 6 or
(ord(s[i+2]) and ones(6)))
when doInc: inc(i, 3)
if i <= s.len - 3:
result = Rune((ord(s[i]) and ones(4)) shl 12 or
(ord(s[i+1]) and ones(6)) shl 6 or
(ord(s[i+2]) and ones(6)))
when doInc: inc(i, 3)
else:
result = replRune
when doInc: inc(i)
elif ord(s[i]) shr 3 == 0b11110:
# assert(ord(s[i+1]) shr 6 == 0b10)
# assert(ord(s[i+2]) shr 6 == 0b10)
# assert(ord(s[i+3]) shr 6 == 0b10)
result = Rune((ord(s[i]) and ones(3)) shl 18 or
(ord(s[i+1]) and ones(6)) shl 12 or
(ord(s[i+2]) and ones(6)) shl 6 or
(ord(s[i+3]) and ones(6)))
when doInc: inc(i, 4)
if i <= s.len - 4:
result = Rune((ord(s[i]) and ones(3)) shl 18 or
(ord(s[i+1]) and ones(6)) shl 12 or
(ord(s[i+2]) and ones(6)) shl 6 or
(ord(s[i+3]) and ones(6)))
when doInc: inc(i, 4)
else:
result = replRune
when doInc: inc(i)
elif ord(s[i]) shr 2 == 0b111110:
# assert(ord(s[i+1]) shr 6 == 0b10)
# assert(ord(s[i+2]) shr 6 == 0b10)
# assert(ord(s[i+3]) shr 6 == 0b10)
# assert(ord(s[i+4]) shr 6 == 0b10)
result = Rune((ord(s[i]) and ones(2)) shl 24 or
(ord(s[i+1]) and ones(6)) shl 18 or
(ord(s[i+2]) and ones(6)) shl 12 or
(ord(s[i+3]) and ones(6)) shl 6 or
(ord(s[i+4]) and ones(6)))
when doInc: inc(i, 5)
if i <= s.len - 5:
result = Rune((ord(s[i]) and ones(2)) shl 24 or
(ord(s[i+1]) and ones(6)) shl 18 or
(ord(s[i+2]) and ones(6)) shl 12 or
(ord(s[i+3]) and ones(6)) shl 6 or
(ord(s[i+4]) and ones(6)))
when doInc: inc(i, 5)
else:
result = replRune
when doInc: inc(i)
elif ord(s[i]) shr 1 == 0b1111110:
# assert(ord(s[i+1]) shr 6 == 0b10)
# assert(ord(s[i+2]) shr 6 == 0b10)
# assert(ord(s[i+3]) shr 6 == 0b10)
# assert(ord(s[i+4]) shr 6 == 0b10)
# assert(ord(s[i+5]) shr 6 == 0b10)
result = Rune((ord(s[i]) and ones(1)) shl 30 or
(ord(s[i+1]) and ones(6)) shl 24 or
(ord(s[i+2]) and ones(6)) shl 18 or
(ord(s[i+3]) and ones(6)) shl 12 or
(ord(s[i+4]) and ones(6)) shl 6 or
(ord(s[i+5]) and ones(6)))
when doInc: inc(i, 6)
if i <= s.len - 6:
result = Rune((ord(s[i]) and ones(1)) shl 30 or
(ord(s[i+1]) and ones(6)) shl 24 or
(ord(s[i+2]) and ones(6)) shl 18 or
(ord(s[i+3]) and ones(6)) shl 12 or
(ord(s[i+4]) and ones(6)) shl 6 or
(ord(s[i+5]) and ones(6)))
when doInc: inc(i, 6)
else:
result = replRune
when doInc: inc(i)
else:
result = Rune(ord(s[i]))
when doInc: inc(i)

View File

@@ -38,10 +38,11 @@ const
UNI_SUR_HIGH_END = 0xDBFF
UNI_SUR_LOW_START = 0xDC00
UNI_SUR_LOW_END = 0xDFFF
UNI_REPL = 0xFFFD
template ones(n: untyped): untyped = ((1 shl n)-1)
template fastRuneAt(s: cstring, i: int, result: untyped, doInc = true) =
template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
## `i` is incremented by the number of bytes that have been processed.
bind ones
@@ -51,24 +52,36 @@ template fastRuneAt(s: cstring, i: int, result: untyped, doInc = true) =
when doInc: inc(i)
elif ord(s[i]) shr 5 == 0b110:
#assert(ord(s[i+1]) shr 6 == 0b10)
result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
when doInc: inc(i, 2)
if i <= L - 2:
result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
when doInc: inc(i, 2)
else:
result = UNI_REPL
when doInc: inc(i)
elif ord(s[i]) shr 4 == 0b1110:
#assert(ord(s[i+1]) shr 6 == 0b10)
#assert(ord(s[i+2]) shr 6 == 0b10)
result = (ord(s[i]) and ones(4)) shl 12 or
(ord(s[i+1]) and ones(6)) shl 6 or
(ord(s[i+2]) and ones(6))
when doInc: inc(i, 3)
if i <= L - 3:
#assert(ord(s[i+1]) shr 6 == 0b10)
#assert(ord(s[i+2]) shr 6 == 0b10)
result = (ord(s[i]) and ones(4)) shl 12 or
(ord(s[i+1]) and ones(6)) shl 6 or
(ord(s[i+2]) and ones(6))
when doInc: inc(i, 3)
else:
result = UNI_REPL
when doInc: inc(i)
elif ord(s[i]) shr 3 == 0b11110:
#assert(ord(s[i+1]) shr 6 == 0b10)
#assert(ord(s[i+2]) shr 6 == 0b10)
#assert(ord(s[i+3]) shr 6 == 0b10)
result = (ord(s[i]) and ones(3)) shl 18 or
(ord(s[i+1]) and ones(6)) shl 12 or
(ord(s[i+2]) and ones(6)) shl 6 or
(ord(s[i+3]) and ones(6))
when doInc: inc(i, 4)
if i <= L - 4:
#assert(ord(s[i+1]) shr 6 == 0b10)
#assert(ord(s[i+2]) shr 6 == 0b10)
#assert(ord(s[i+3]) shr 6 == 0b10)
result = (ord(s[i]) and ones(3)) shl 18 or
(ord(s[i+1]) and ones(6)) shl 12 or
(ord(s[i+2]) and ones(6)) shl 6 or
(ord(s[i+3]) and ones(6))
when doInc: inc(i, 4)
else:
result = UNI_REPL
when doInc: inc(i)
else:
result = 0xFFFD
when doInc: inc(i)
@@ -78,7 +91,7 @@ iterator runes(s: cstring, L: int): int =
i = 0
result: int
while i < L:
fastRuneAt(s, i, result, true)
fastRuneAt(s, i, L, result, true)
yield result
proc newWideCString*(source: cstring, L: int): WideCString =