Deprecate 'c', 'C' prefix for octal literals, fixes #8082 (#8178)

* deprecate `0c`, `0C` prefix for octal literals

Deprecates the previously allowed syntax of `0c` and `0C` as a prefix for
octal literals to bring the literals in line with the behavior of
`parseOct` from parseutils.

* add `msgKind` arg to `lexMessageLitNum` for deprecation messages

* change literal tests to check all valid integer literals

Also adds the `tinvaligintegerlit3` test to test for the (still)
invalid `0O` prefix.
This commit is contained in:
Vindaar
2018-07-03 01:56:36 +02:00
committed by Andreas Rumpf
parent 70cf286a22
commit 681d8e0749
7 changed files with 62 additions and 33 deletions

View File

@@ -20,6 +20,8 @@
- The parser now warns about inconsistent spacing around binary operators as
these can easily be confused with unary operators. This warning will likely
become an error in the future.
- The ``'c`` and ``'C'`` prefix for octal literals is now deprecated to
bring the language in line with the standard library (e.g. ``parseOct``).
#### Breaking changes in the standard library

View File

@@ -359,9 +359,8 @@ proc getNumber(L: var TLexer, result: var TToken) =
inc(pos)
L.bufpos = pos
proc lexMessageLitNum(L: var TLexer, msg: string, startpos: int) =
proc lexMessageLitNum(L: var TLexer, msg: string, startpos: int, msgKind = errGenerated) =
# Used to get slightly human friendlier err messages.
# Note: the erroneous 'O' char in the character set is intentional
const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'}
var msgPos = L.bufpos
@@ -380,7 +379,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
add(t.literal, L.buf[L.bufpos])
matchChars(L, t, {'0'..'9'})
L.bufpos = msgPos
lexMessage(L, errGenerated, msg % t.literal)
lexMessage(L, msgKind, msg % t.literal)
var
startpos, endpos: int
@@ -388,7 +387,8 @@ proc getNumber(L: var TLexer, result: var TToken) =
isBase10 = true
numDigits = 0
const
baseCodeChars = {'X', 'x', 'o', 'c', 'C', 'b', 'B'}
# 'c', 'C' is deprecated
baseCodeChars = {'X', 'x', 'o', 'b', 'B', 'c', 'C'}
literalishChars = baseCodeChars + {'A'..'F', 'a'..'f', '0'..'9', '_', '\''}
floatTypes = {tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit}
result.tokType = tkIntLit # int literal until we know better
@@ -398,17 +398,27 @@ proc getNumber(L: var TLexer, result: var TToken) =
tokenBegin(result, startPos)
# First stage: find out base, make verifications, build token literal string
if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}:
# {'c', 'C'} is added for deprecation reasons to provide a clear error message
if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'c', 'C', 'O'}:
isBase10 = false
eatChar(L, result, '0')
case L.buf[L.bufpos]
of 'c', 'C':
lexMessageLitNum(L,
"$1 will soon be invalid for oct literals; Use '0o' " &
"for octals. 'c', 'C' prefix",
startpos,
warnDeprecated)
eatChar(L, result, 'c')
numDigits = matchUnderscoreChars(L, result, {'0'..'7'})
of 'O':
lexMessageLitNum(L, "$1 is not a valid number; did you mean octal? Then use one of '0o', '0c' or '0C'.", startpos)
lexMessageLitNum(L, "$1 is an invalid int literal; For octal literals " &
"use the '0o' prefix.", startpos)
of 'x', 'X':
eatChar(L, result, 'x')
numDigits = matchUnderscoreChars(L, result, {'0'..'9', 'a'..'f', 'A'..'F'})
of 'o', 'c', 'C':
eatChar(L, result, 'c')
of 'o':
eatChar(L, result, 'o')
numDigits = matchUnderscoreChars(L, result, {'0'..'7'})
of 'b', 'B':
eatChar(L, result, 'b')
@@ -511,6 +521,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
if L.buf[pos] != '_':
xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
# 'c', 'C' is deprecated
of 'o', 'c', 'C':
result.base = base8
while pos < endpos:

View File

@@ -413,7 +413,7 @@ Numerical constants are of a single type and have the form::
bindigit = '0'..'1'
HEX_LIT = '0' ('x' | 'X' ) hexdigit ( ['_'] hexdigit )*
DEC_LIT = digit ( ['_'] digit )*
OCT_LIT = '0' ('o' | 'c' | 'C') octdigit ( ['_'] octdigit )*
OCT_LIT = '0' 'o' octdigit ( ['_'] octdigit )*
BIN_LIT = '0' ('b' | 'B' ) bindigit ( ['_'] bindigit )*
INT_LIT = HEX_LIT
@@ -444,7 +444,7 @@ Numerical constants are of a single type and have the form::
As can be seen in the productions, numerical constants can contain underscores
for readability. Integer and floating point literals may be given in decimal (no
prefix), binary (prefix ``0b``), octal (prefix ``0o`` or ``0c``) and hexadecimal
prefix), binary (prefix ``0b``), octal (prefix ``0o``) and hexadecimal
(prefix ``0x``) notation.
There exists a literal for each numerical type that is

View File

@@ -36,26 +36,26 @@
# platforms - where do they come from and why are they here?
when false:
const
C_IRUSR = 0c000400 ## Read by owner.
C_IWUSR = 0c000200 ## Write by owner.
C_IXUSR = 0c000100 ## Execute by owner.
C_IRGRP = 0c000040 ## Read by group.
C_IWGRP = 0c000020 ## Write by group.
C_IXGRP = 0c000010 ## Execute by group.
C_IROTH = 0c000004 ## Read by others.
C_IWOTH = 0c000002 ## Write by others.
C_IXOTH = 0c000001 ## Execute by others.
C_ISUID = 0c004000 ## Set user ID.
C_ISGID = 0c002000 ## Set group ID.
C_ISVTX = 0c001000 ## On directories, restricted deletion flag.
C_ISDIR = 0c040000 ## Directory.
C_ISFIFO = 0c010000 ##FIFO.
C_ISREG = 0c100000 ## Regular file.
C_ISBLK = 0c060000 ## Block special.
C_ISCHR = 0c020000 ## Character special.
C_ISCTG = 0c110000 ## Reserved.
C_ISLNK = 0c120000 ## Symbolic link.</p>
C_ISSOCK = 0c140000 ## Socket.
C_IRUSR = 0o000400 ## Read by owner.
C_IWUSR = 0o000200 ## Write by owner.
C_IXUSR = 0o000100 ## Execute by owner.
C_IRGRP = 0o000040 ## Read by group.
C_IWGRP = 0o000020 ## Write by group.
C_IXGRP = 0o000010 ## Execute by group.
C_IROTH = 0o000004 ## Read by others.
C_IWOTH = 0o000002 ## Write by others.
C_IXOTH = 0o000001 ## Execute by others.
C_ISUID = 0o004000 ## Set user ID.
C_ISGID = 0o002000 ## Set group ID.
C_ISVTX = 0o001000 ## On directories, restricted deletion flag.
C_ISDIR = 0o040000 ## Directory.
C_ISFIFO = 0o010000 ##FIFO.
C_ISREG = 0o100000 ## Regular file.
C_ISBLK = 0o060000 ## Block special.
C_ISCHR = 0o020000 ## Character special.
C_ISCTG = 0o110000 ## Reserved.
C_ISLNK = 0o120000 ## Symbolic link.</p>
C_ISSOCK = 0o140000 ## Socket.
const
MM_NULLLBL* = nil

View File

@@ -0,0 +1,9 @@
# test the valid literals
assert 0b10 == 2
assert 0B10 == 2
assert 0x10 == 16
assert 0X10 == 16
assert 0o10 == 8
# the following is deprecated:
assert 0c10 == 8
assert 0C10 == 8

View File

@@ -0,0 +1,7 @@
discard """
file: "tinvalidintegerliteral3.nim"
line: 7
errormsg: "0O5 is an invalid int literal; For octal literals use the '0o' prefix."
"""
echo 0O5

View File

@@ -9,7 +9,7 @@ discard """
# 8 bit
let ref1 = 128'u8 shr 7
let hex1 = 0x80'u8 shr 7
let oct1 = 0c200'u8 shr 7
let oct1 = 0o200'u8 shr 7
let dig1 = 0b10000000'u8 shr 7
doAssert(ref1 == 1)
@@ -20,7 +20,7 @@ doAssert(ref1 == dig1)
# 16 bit
let ref2 = 32768'u16 shr 15
let hex2 = 0x8000'u16 shr 15
let oct2 = 0c100000'u16 shr 15
let oct2 = 0o100000'u16 shr 15
let dig2 = 0b1000000000000000'u16 shr 15
doAssert(ref2 == 1)
@@ -31,7 +31,7 @@ doAssert(ref2 == dig2)
# 32 bit
let ref3 = 2147483648'u32 shr 31
let hex3 = 0x80000000'u32 shr 31
let oct3 = 0c20000000000'u32 shr 31
let oct3 = 0o20000000000'u32 shr 31
let dig3 = 0b10000000000000000000000000000000'u32 shr 31
doAssert(ref3 == 1)