mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-07 13:33:22 +00:00
system/unicode: check for buffer overflows; refs #5284
This commit is contained in:
@@ -49,6 +49,8 @@ proc runeLenAt*(s: string, i: Natural): int =
|
||||
elif ord(s[i]) shr 1 == 0b1111110: result = 6
|
||||
else: result = 1
|
||||
|
||||
const replRune = Rune(0xFFFD)
|
||||
|
||||
template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
|
||||
## Returns the Unicode character ``s[i]`` in ``result``. If ``doInc == true``
|
||||
## ``i`` is incremented by the number of bytes that have been processed.
|
||||
@@ -58,49 +60,69 @@ template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
|
||||
when doInc: inc(i)
|
||||
elif ord(s[i]) shr 5 == 0b110:
|
||||
# assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
result = Rune((ord(s[i]) and (ones(5))) shl 6 or
|
||||
(ord(s[i+1]) and ones(6)))
|
||||
when doInc: inc(i, 2)
|
||||
if i <= s.len - 2:
|
||||
result = Rune((ord(s[i]) and (ones(5))) shl 6 or
|
||||
(ord(s[i+1]) and ones(6)))
|
||||
when doInc: inc(i, 2)
|
||||
else:
|
||||
result = replRune
|
||||
when doInc: inc(i)
|
||||
elif ord(s[i]) shr 4 == 0b1110:
|
||||
# assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+2]) shr 6 == 0b10)
|
||||
result = Rune((ord(s[i]) and ones(4)) shl 12 or
|
||||
(ord(s[i+1]) and ones(6)) shl 6 or
|
||||
(ord(s[i+2]) and ones(6)))
|
||||
when doInc: inc(i, 3)
|
||||
if i <= s.len - 3:
|
||||
result = Rune((ord(s[i]) and ones(4)) shl 12 or
|
||||
(ord(s[i+1]) and ones(6)) shl 6 or
|
||||
(ord(s[i+2]) and ones(6)))
|
||||
when doInc: inc(i, 3)
|
||||
else:
|
||||
result = replRune
|
||||
when doInc: inc(i)
|
||||
elif ord(s[i]) shr 3 == 0b11110:
|
||||
# assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+2]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+3]) shr 6 == 0b10)
|
||||
result = Rune((ord(s[i]) and ones(3)) shl 18 or
|
||||
(ord(s[i+1]) and ones(6)) shl 12 or
|
||||
(ord(s[i+2]) and ones(6)) shl 6 or
|
||||
(ord(s[i+3]) and ones(6)))
|
||||
when doInc: inc(i, 4)
|
||||
if i <= s.len - 4:
|
||||
result = Rune((ord(s[i]) and ones(3)) shl 18 or
|
||||
(ord(s[i+1]) and ones(6)) shl 12 or
|
||||
(ord(s[i+2]) and ones(6)) shl 6 or
|
||||
(ord(s[i+3]) and ones(6)))
|
||||
when doInc: inc(i, 4)
|
||||
else:
|
||||
result = replRune
|
||||
when doInc: inc(i)
|
||||
elif ord(s[i]) shr 2 == 0b111110:
|
||||
# assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+2]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+3]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+4]) shr 6 == 0b10)
|
||||
result = Rune((ord(s[i]) and ones(2)) shl 24 or
|
||||
(ord(s[i+1]) and ones(6)) shl 18 or
|
||||
(ord(s[i+2]) and ones(6)) shl 12 or
|
||||
(ord(s[i+3]) and ones(6)) shl 6 or
|
||||
(ord(s[i+4]) and ones(6)))
|
||||
when doInc: inc(i, 5)
|
||||
if i <= s.len - 5:
|
||||
result = Rune((ord(s[i]) and ones(2)) shl 24 or
|
||||
(ord(s[i+1]) and ones(6)) shl 18 or
|
||||
(ord(s[i+2]) and ones(6)) shl 12 or
|
||||
(ord(s[i+3]) and ones(6)) shl 6 or
|
||||
(ord(s[i+4]) and ones(6)))
|
||||
when doInc: inc(i, 5)
|
||||
else:
|
||||
result = replRune
|
||||
when doInc: inc(i)
|
||||
elif ord(s[i]) shr 1 == 0b1111110:
|
||||
# assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+2]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+3]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+4]) shr 6 == 0b10)
|
||||
# assert(ord(s[i+5]) shr 6 == 0b10)
|
||||
result = Rune((ord(s[i]) and ones(1)) shl 30 or
|
||||
(ord(s[i+1]) and ones(6)) shl 24 or
|
||||
(ord(s[i+2]) and ones(6)) shl 18 or
|
||||
(ord(s[i+3]) and ones(6)) shl 12 or
|
||||
(ord(s[i+4]) and ones(6)) shl 6 or
|
||||
(ord(s[i+5]) and ones(6)))
|
||||
when doInc: inc(i, 6)
|
||||
if i <= s.len - 6:
|
||||
result = Rune((ord(s[i]) and ones(1)) shl 30 or
|
||||
(ord(s[i+1]) and ones(6)) shl 24 or
|
||||
(ord(s[i+2]) and ones(6)) shl 18 or
|
||||
(ord(s[i+3]) and ones(6)) shl 12 or
|
||||
(ord(s[i+4]) and ones(6)) shl 6 or
|
||||
(ord(s[i+5]) and ones(6)))
|
||||
when doInc: inc(i, 6)
|
||||
else:
|
||||
result = replRune
|
||||
when doInc: inc(i)
|
||||
else:
|
||||
result = Rune(ord(s[i]))
|
||||
when doInc: inc(i)
|
||||
|
||||
@@ -38,10 +38,11 @@ const
|
||||
UNI_SUR_HIGH_END = 0xDBFF
|
||||
UNI_SUR_LOW_START = 0xDC00
|
||||
UNI_SUR_LOW_END = 0xDFFF
|
||||
UNI_REPL = 0xFFFD
|
||||
|
||||
template ones(n: untyped): untyped = ((1 shl n)-1)
|
||||
|
||||
template fastRuneAt(s: cstring, i: int, result: untyped, doInc = true) =
|
||||
template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
|
||||
## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
|
||||
## `i` is incremented by the number of bytes that have been processed.
|
||||
bind ones
|
||||
@@ -51,24 +52,36 @@ template fastRuneAt(s: cstring, i: int, result: untyped, doInc = true) =
|
||||
when doInc: inc(i)
|
||||
elif ord(s[i]) shr 5 == 0b110:
|
||||
#assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
|
||||
when doInc: inc(i, 2)
|
||||
if i <= L - 2:
|
||||
result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
|
||||
when doInc: inc(i, 2)
|
||||
else:
|
||||
result = UNI_REPL
|
||||
when doInc: inc(i)
|
||||
elif ord(s[i]) shr 4 == 0b1110:
|
||||
#assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
#assert(ord(s[i+2]) shr 6 == 0b10)
|
||||
result = (ord(s[i]) and ones(4)) shl 12 or
|
||||
(ord(s[i+1]) and ones(6)) shl 6 or
|
||||
(ord(s[i+2]) and ones(6))
|
||||
when doInc: inc(i, 3)
|
||||
if i <= L - 3:
|
||||
#assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
#assert(ord(s[i+2]) shr 6 == 0b10)
|
||||
result = (ord(s[i]) and ones(4)) shl 12 or
|
||||
(ord(s[i+1]) and ones(6)) shl 6 or
|
||||
(ord(s[i+2]) and ones(6))
|
||||
when doInc: inc(i, 3)
|
||||
else:
|
||||
result = UNI_REPL
|
||||
when doInc: inc(i)
|
||||
elif ord(s[i]) shr 3 == 0b11110:
|
||||
#assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
#assert(ord(s[i+2]) shr 6 == 0b10)
|
||||
#assert(ord(s[i+3]) shr 6 == 0b10)
|
||||
result = (ord(s[i]) and ones(3)) shl 18 or
|
||||
(ord(s[i+1]) and ones(6)) shl 12 or
|
||||
(ord(s[i+2]) and ones(6)) shl 6 or
|
||||
(ord(s[i+3]) and ones(6))
|
||||
when doInc: inc(i, 4)
|
||||
if i <= L - 4:
|
||||
#assert(ord(s[i+1]) shr 6 == 0b10)
|
||||
#assert(ord(s[i+2]) shr 6 == 0b10)
|
||||
#assert(ord(s[i+3]) shr 6 == 0b10)
|
||||
result = (ord(s[i]) and ones(3)) shl 18 or
|
||||
(ord(s[i+1]) and ones(6)) shl 12 or
|
||||
(ord(s[i+2]) and ones(6)) shl 6 or
|
||||
(ord(s[i+3]) and ones(6))
|
||||
when doInc: inc(i, 4)
|
||||
else:
|
||||
result = UNI_REPL
|
||||
when doInc: inc(i)
|
||||
else:
|
||||
result = 0xFFFD
|
||||
when doInc: inc(i)
|
||||
@@ -78,7 +91,7 @@ iterator runes(s: cstring, L: int): int =
|
||||
i = 0
|
||||
result: int
|
||||
while i < L:
|
||||
fastRuneAt(s, i, result, true)
|
||||
fastRuneAt(s, i, L, result, true)
|
||||
yield result
|
||||
|
||||
proc newWideCString*(source: cstring, L: int): WideCString =
|
||||
|
||||
Reference in New Issue
Block a user