Merge pull request #2890 from ozra/fix-1179-unsigned-number-literals

Fix 1179 unsigned number literals
This commit is contained in:
Andreas Rumpf
2015-06-13 19:49:32 +02:00
2 changed files with 153 additions and 48 deletions

View File

@@ -262,14 +262,11 @@ template eatChar(L: var TLexer, t: var TToken) =
add(t.literal, L.buf[L.bufpos])
inc(L.bufpos)
proc getNumber(L: var TLexer): TToken =
var
startpos, endpos: int
xi: BiggestInt
const literalishChars = { 'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'c',
'C', 'b', 'B', '_', '.', '\''}
const literalishCharsNoDot = literalishChars - {'.'}
proc getNumber(L: var TLexer): TToken =
proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) =
var pos = L.bufpos # use registers for pos, buf
var buf = L.buf
@@ -318,14 +315,22 @@ proc getNumber(L: var TLexer): TToken =
L.bufpos = msgPos
lexMessage(L, msg, t.literal)
var
startpos, endpos: int
xi: BiggestInt
isBase10 = true
const
baseCodeChars = {'X', 'x', 'o', 'c', 'C', 'b', 'B'}
literalishChars = baseCodeChars + {'A'..'F', 'a'..'f', '0'..'9', '_', '\''}
floatTypes = {tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit}
result.tokType = tkIntLit # int literal until we know better
result.literal = ""
result.base = base10
startpos = L.bufpos
var isAFloatLiteral = false
# First stage: find out base, make verifications, build token literal string
if L.buf[L.bufpos] == '0' and
L.buf[L.bufpos + 1] in {'X', 'x', 'o', 'O', 'c', 'C', 'b', 'B'}:
if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}:
isBase10 = false
eatChar(L, result, '0')
case L.buf[L.bufpos]
of 'O':
@@ -344,21 +349,23 @@ proc getNumber(L: var TLexer): TToken =
else:
matchUnderscoreChars(L, result, {'0'..'9'})
if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
isAFloatLiteral = true
result.tokType = tkFloat64Lit
eatChar(L, result, '.')
matchUnderscoreChars(L, result, {'0'..'9'})
if L.buf[L.bufpos] in {'e', 'E'}:
isAFloatLiteral = true
result.tokType = tkFloat64Lit
eatChar(L, result, 'e')
if L.buf[L.bufpos] in {'+', '-'}:
eatChar(L, result)
matchUnderscoreChars(L, result, {'0'..'9'})
endpos = L.bufpos
# Second stage, find out if there's a datatype postfix and handle it
# Second stage, find out if there's a datatype suffix and handle it
var postPos = endpos
if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
if L.buf[postPos] == '\'':
if L.buf[postPos] == '\'':
inc(postPos)
case L.buf[postPos]
of 'f', 'F':
inc(postPos)
@@ -410,21 +417,23 @@ proc getNumber(L: var TLexer): TToken =
inc(postPos)
else:
result.tokType = tkUIntLit
else:
else:
lexMessageLitNum(L, errInvalidNumber, startpos)
# Is there still a literalish char awaiting? Then it's an error!
if L.buf[postPos] in literalishCharsNoDot or
if L.buf[postPos] in literalishChars or
(L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
lexMessageLitNum(L, errInvalidNumber, startpos)
# Third stage, extract actual number
L.bufpos = startpos # restore position
var pos: int = startpos
try:
if (L.buf[pos] == '0') and
(L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}):
if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
inc(pos, 2)
xi = 0 # it is a base prefix
case L.buf[pos - 1] # now look at the optional type suffix:
case L.buf[pos - 1]
of 'b', 'B':
result.base = base2
while pos < endpos:
@@ -452,48 +461,76 @@ proc getNumber(L: var TLexer): TToken =
of 'A'..'F':
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
inc(pos)
else:
else:
break
else:
else:
internalError(getLineInfo(L), "getNumber")
case result.tokType
of tkIntLit, tkInt64Lit: result.iNumber = xi
of tkInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi))))
of tkInt16Lit: result.iNumber = BiggestInt(toU16(int(xi)))
of tkInt32Lit: result.iNumber = BiggestInt(toU32(xi))
of tkInt16Lit: result.iNumber = BiggestInt(int16(toU16(int(xi))))
of tkInt32Lit: result.iNumber = BiggestInt(int32(toU32(int64(xi))))
of tkUIntLit, tkUInt64Lit: result.iNumber = xi
of tkUInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi))))
of tkUInt16Lit: result.iNumber = BiggestInt(toU16(int(xi)))
of tkUInt32Lit: result.iNumber = BiggestInt(toU32(xi))
of tkUInt8Lit: result.iNumber = BiggestInt(uint8(toU8(int(xi))))
of tkUInt16Lit: result.iNumber = BiggestInt(uint16(toU16(int(xi))))
of tkUInt32Lit: result.iNumber = BiggestInt(uint32(toU32(int64(xi))))
of tkFloat32Lit:
result.fNumber = (cast[PFloat32](addr(xi)))[]
# note: this code is endian neutral!
# XXX: Test this on big endian machine!
of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[]
else: internalError(getLineInfo(L), "getNumber")
elif isAFloatLiteral or (result.tokType == tkFloat32Lit) or
(result.tokType == tkFloat64Lit):
result.fNumber = parseFloat(result.literal)
if result.tokType == tkIntLit: result.tokType = tkFloatLit
elif result.tokType == tkUint64Lit:
xi = 0
let len = unsafeParseUInt(result.literal, xi)
if len != result.literal.len or len == 0:
raise newException(ValueError, "invalid integer: " & $xi)
result.iNumber = xi
else:
result.iNumber = parseBiggestInt(result.literal)
if (result.iNumber < low(int32)) or (result.iNumber > high(int32)):
if result.tokType == tkIntLit:
result.tokType = tkInt64Lit
elif result.tokType in {tkInt8Lit, tkInt16Lit, tkInt32Lit}:
# Bounds checks. Non decimal literals are allowed to overflow the range of
# the datatype as long as their pattern don't overflow _bitwise_, hence
# below checks of signed sizes against uint*.high is deliberate:
# (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
if result.tokType notin floatTypes:
let outOfRange = case result.tokType:
of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
of tkInt8Lit: (xi > BiggestInt(uint8.high))
of tkInt16Lit: (xi > BiggestInt(uint16.high))
of tkInt32Lit: (xi > BiggestInt(uint32.high))
else: false
if outOfRange:
echo "out of range num: ", result.iNumber, " vs ", xi
lexMessageLitNum(L, errNumberOutOfRange, startpos)
elif result.tokType == tkInt8Lit and
(result.iNumber < int8.low or result.iNumber > int8.high):
lexMessageLitNum(L, errNumberOutOfRange, startpos)
elif result.tokType == tkInt16Lit and
(result.iNumber < int16.low or result.iNumber > int16.high):
lexMessageLitNum(L, errNumberOutOfRange, startpos)
else:
case result.tokType
of floatTypes:
result.fNumber = parseFloat(result.literal)
of tkUint64Lit:
xi = 0
let len = unsafeParseUInt(result.literal, xi)
if len != result.literal.len or len == 0:
raise newException(ValueError, "invalid integer: " & $xi)
result.iNumber = xi
else:
result.iNumber = parseBiggestInt(result.literal)
# Explicit bounds checks
let outOfRange = case result.tokType:
of tkInt8Lit: (result.iNumber < int8.low or result.iNumber > int8.high)
of tkUInt8Lit: (result.iNumber < BiggestInt(uint8.low) or
result.iNumber > BiggestInt(uint8.high))
of tkInt16Lit: (result.iNumber < int16.low or result.iNumber > int16.high)
of tkUInt16Lit: (result.iNumber < BiggestInt(uint16.low) or
result.iNumber > BiggestInt(uint16.high))
of tkInt32Lit: (result.iNumber < int32.low or result.iNumber > int32.high)
of tkUInt32Lit: (result.iNumber < BiggestInt(uint32.low) or
result.iNumber > BiggestInt(uint32.high))
else: false
if outOfRange: lexMessageLitNum(L, errNumberOutOfRange, startpos)
# Promote int literal to int64? Not always necessary, but more consistent
if result.tokType == tkIntLit:
if (result.iNumber < low(int32)) or (result.iNumber > high(int32)):
result.tokType = tkInt64Lit
except ValueError:
lexMessageLitNum(L, errInvalidNumber, startpos)
except OverflowError, RangeError:

View File

@@ -0,0 +1,68 @@
import unsigned
discard """
errormsg: "number 0x123'u8 out of valid range"
"""
# Bug #1179
# Unsigneds
# 8 bit
let ref1 = 128'u8 shr 7
let hex1 = 0x80'u8 shr 7
let oct1 = 0c200'u8 shr 7
let dig1 = 0b10000000'u8 shr 7
doAssert(ref1 == 1)
doAssert(ref1 == hex1)
doAssert(ref1 == oct1)
doAssert(ref1 == dig1)
# 16 bit
let ref2 = 32768'u16 shr 15
let hex2 = 0x8000'u16 shr 15
let oct2 = 0c100000'u16 shr 15
let dig2 = 0b1000000000000000'u16 shr 15
doAssert(ref2 == 1)
doAssert(ref2 == hex2)
doAssert(ref2 == oct2)
doAssert(ref2 == dig2)
# 32 bit
let ref3 = 2147483648'u32 shr 31
let hex3 = 0x80000000'u32 shr 31
let oct3 = 0c20000000000'u32 shr 31
let dig3 = 0b10000000000000000000000000000000'u32 shr 31
doAssert(ref3 == 1)
doAssert(ref3 == hex3)
doAssert(ref3 == oct3)
doAssert(ref3 == dig3)
# Below doesn't work for lexer stage errors...
# doAssert(compiles(0xFF'u8) == true)
# doAssert(compiles(0xFFF'u16) == true)
# doAssert(compiles(0x7FFF'i16) == true)
# doAssert(compiles(0x123'u8) == false)
# doAssert(compiles(0x123'i8) == false)
# doAssert(compiles(0x123123'u16) == false)
# doAssert(compiles(0x123123'i16) == false)
# Should compile #
let boundOkHex1 = 0xFF'u8
let boundOkHex2 = 0xFFFF'u16
let boundOkHex3 = 0x7FFF'i16
let boundOkHex4 = 0x80'i8
let boundOkHex5 = 0xFF'i8
let boundOkHex6 = 0xFFFF'i16
let boundOkHex7 = 0x7FFF'i16
# Should _not_ compile #
let boundBreakingHex1 = 0x123'u8
let boundBreakingHex2 = 0x123'i8
let boundBreakingHex3 = 0x123123'u16
let boundBreakingHex4 = 0x123123'i16