diff --git a/changelog.md b/changelog.md index bbe5261249..0943d78364 100644 --- a/changelog.md +++ b/changelog.md @@ -20,6 +20,8 @@ - The parser now warns about inconsistent spacing around binary operators as these can easily be confused with unary operators. This warning will likely become an error in the future. +- The ``'c`` and ``'C'`` prefix for octal literals is now deprecated to + bring the language in line with the standard library (e.g. ``parseOct``). #### Breaking changes in the standard library diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 375bf227dd..9727d16a7e 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -359,9 +359,8 @@ proc getNumber(L: var TLexer, result: var TToken) = inc(pos) L.bufpos = pos - proc lexMessageLitNum(L: var TLexer, msg: string, startpos: int) = + proc lexMessageLitNum(L: var TLexer, msg: string, startpos: int, msgKind = errGenerated) = # Used to get slightly human friendlier err messages. - # Note: the erroneous 'O' char in the character set is intentional const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O', 'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'} var msgPos = L.bufpos @@ -380,7 +379,7 @@ proc getNumber(L: var TLexer, result: var TToken) = add(t.literal, L.buf[L.bufpos]) matchChars(L, t, {'0'..'9'}) L.bufpos = msgPos - lexMessage(L, errGenerated, msg % t.literal) + lexMessage(L, msgKind, msg % t.literal) var startpos, endpos: int @@ -388,7 +387,8 @@ proc getNumber(L: var TLexer, result: var TToken) = isBase10 = true numDigits = 0 const - baseCodeChars = {'X', 'x', 'o', 'c', 'C', 'b', 'B'} + # 'c', 'C' is deprecated + baseCodeChars = {'X', 'x', 'o', 'b', 'B', 'c', 'C'} literalishChars = baseCodeChars + {'A'..'F', 'a'..'f', '0'..'9', '_', '\''} floatTypes = {tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit} result.tokType = tkIntLit # int literal until we know better @@ -398,17 +398,27 @@ proc getNumber(L: var TLexer, result: var TToken) = tokenBegin(result, startPos) # First stage: find out base, make verifications, build token literal string - if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}: + # {'c', 'C'} is added for deprecation reasons to provide a clear error message + if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'c', 'C', 'O'}: isBase10 = false eatChar(L, result, '0') case L.buf[L.bufpos] + of 'c', 'C': + lexMessageLitNum(L, + "$1 will soon be invalid for oct literals; Use '0o' " & + "for octals. 'c', 'C' prefix", + startpos, + warnDeprecated) + eatChar(L, result, 'c') + numDigits = matchUnderscoreChars(L, result, {'0'..'7'}) of 'O': - lexMessageLitNum(L, "$1 is not a valid number; did you mean octal? Then use one of '0o', '0c' or '0C'.", startpos) + lexMessageLitNum(L, "$1 is an invalid int literal; For octal literals " & + "use the '0o' prefix.", startpos) of 'x', 'X': eatChar(L, result, 'x') numDigits = matchUnderscoreChars(L, result, {'0'..'9', 'a'..'f', 'A'..'F'}) - of 'o', 'c', 'C': - eatChar(L, result, 'c') + of 'o': + eatChar(L, result, 'o') numDigits = matchUnderscoreChars(L, result, {'0'..'7'}) of 'b', 'B': eatChar(L, result, 'b') @@ -511,6 +521,7 @@ proc getNumber(L: var TLexer, result: var TToken) = if L.buf[pos] != '_': xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0')) inc(pos) + # 'c', 'C' is deprecated of 'o', 'c', 'C': result.base = base8 while pos < endpos: diff --git a/doc/manual.rst b/doc/manual.rst index 28fbfb1990..5ca6da22e1 100644 --- a/doc/manual.rst +++ b/doc/manual.rst @@ -413,7 +413,7 @@ Numerical constants are of a single type and have the form:: bindigit = '0'..'1' HEX_LIT = '0' ('x' | 'X' ) hexdigit ( ['_'] hexdigit )* DEC_LIT = digit ( ['_'] digit )* - OCT_LIT = '0' ('o' | 'c' | 'C') octdigit ( ['_'] octdigit )* + OCT_LIT = '0' 'o' octdigit ( ['_'] octdigit )* BIN_LIT = '0' ('b' | 'B' ) bindigit ( ['_'] bindigit )* INT_LIT = HEX_LIT @@ -444,7 +444,7 @@ Numerical constants are of a single type and have the form:: As can be seen in the productions, numerical constants can contain underscores for readability. Integer and floating point literals may be given in decimal (no -prefix), binary (prefix ``0b``), octal (prefix ``0o`` or ``0c``) and hexadecimal +prefix), binary (prefix ``0b``), octal (prefix ``0o``) and hexadecimal (prefix ``0x``) notation. There exists a literal for each numerical type that is diff --git a/lib/posix/posix.nim b/lib/posix/posix.nim index 97b4124ec4..c230e65985 100644 --- a/lib/posix/posix.nim +++ b/lib/posix/posix.nim @@ -36,26 +36,26 @@ # platforms - where do they come from and why are they here? when false: const - C_IRUSR = 0c000400 ## Read by owner. - C_IWUSR = 0c000200 ## Write by owner. - C_IXUSR = 0c000100 ## Execute by owner. - C_IRGRP = 0c000040 ## Read by group. - C_IWGRP = 0c000020 ## Write by group. - C_IXGRP = 0c000010 ## Execute by group. - C_IROTH = 0c000004 ## Read by others. - C_IWOTH = 0c000002 ## Write by others. - C_IXOTH = 0c000001 ## Execute by others. - C_ISUID = 0c004000 ## Set user ID. - C_ISGID = 0c002000 ## Set group ID. - C_ISVTX = 0c001000 ## On directories, restricted deletion flag. - C_ISDIR = 0c040000 ## Directory. - C_ISFIFO = 0c010000 ##FIFO. - C_ISREG = 0c100000 ## Regular file. - C_ISBLK = 0c060000 ## Block special. - C_ISCHR = 0c020000 ## Character special. - C_ISCTG = 0c110000 ## Reserved. - C_ISLNK = 0c120000 ## Symbolic link.

- C_ISSOCK = 0c140000 ## Socket. + C_IRUSR = 0o000400 ## Read by owner. + C_IWUSR = 0o000200 ## Write by owner. + C_IXUSR = 0o000100 ## Execute by owner. + C_IRGRP = 0o000040 ## Read by group. + C_IWGRP = 0o000020 ## Write by group. + C_IXGRP = 0o000010 ## Execute by group. + C_IROTH = 0o000004 ## Read by others. + C_IWOTH = 0o000002 ## Write by others. + C_IXOTH = 0o000001 ## Execute by others. + C_ISUID = 0o004000 ## Set user ID. + C_ISGID = 0o002000 ## Set group ID. + C_ISVTX = 0o001000 ## On directories, restricted deletion flag. + C_ISDIR = 0o040000 ## Directory. + C_ISFIFO = 0o010000 ##FIFO. + C_ISREG = 0o100000 ## Regular file. + C_ISBLK = 0o060000 ## Block special. + C_ISCHR = 0o020000 ## Character special. + C_ISCTG = 0o110000 ## Reserved. + C_ISLNK = 0o120000 ## Symbolic link.

+ C_ISSOCK = 0o140000 ## Socket. const MM_NULLLBL* = nil diff --git a/tests/lexer/tintegerliterals.nim b/tests/lexer/tintegerliterals.nim new file mode 100644 index 0000000000..7420db144d --- /dev/null +++ b/tests/lexer/tintegerliterals.nim @@ -0,0 +1,9 @@ +# test the valid literals +assert 0b10 == 2 +assert 0B10 == 2 +assert 0x10 == 16 +assert 0X10 == 16 +assert 0o10 == 8 +# the following is deprecated: +assert 0c10 == 8 +assert 0C10 == 8 diff --git a/tests/lexer/tinvalidintegerliteral3.nim b/tests/lexer/tinvalidintegerliteral3.nim new file mode 100644 index 0000000000..9c2fe69df5 --- /dev/null +++ b/tests/lexer/tinvalidintegerliteral3.nim @@ -0,0 +1,7 @@ +discard """ + file: "tinvalidintegerliteral3.nim" + line: 7 + errormsg: "0O5 is an invalid int literal; For octal literals use the '0o' prefix." +""" + +echo 0O5 diff --git a/tests/misc/tunsignedmisc.nim b/tests/misc/tunsignedmisc.nim index fc02eee195..b2a3849cf6 100644 --- a/tests/misc/tunsignedmisc.nim +++ b/tests/misc/tunsignedmisc.nim @@ -9,7 +9,7 @@ discard """ # 8 bit let ref1 = 128'u8 shr 7 let hex1 = 0x80'u8 shr 7 -let oct1 = 0c200'u8 shr 7 +let oct1 = 0o200'u8 shr 7 let dig1 = 0b10000000'u8 shr 7 doAssert(ref1 == 1) @@ -20,7 +20,7 @@ doAssert(ref1 == dig1) # 16 bit let ref2 = 32768'u16 shr 15 let hex2 = 0x8000'u16 shr 15 -let oct2 = 0c100000'u16 shr 15 +let oct2 = 0o100000'u16 shr 15 let dig2 = 0b1000000000000000'u16 shr 15 doAssert(ref2 == 1) @@ -31,7 +31,7 @@ doAssert(ref2 == dig2) # 32 bit let ref3 = 2147483648'u32 shr 31 let hex3 = 0x80000000'u32 shr 31 -let oct3 = 0c20000000000'u32 shr 31 +let oct3 = 0o20000000000'u32 shr 31 let dig3 = 0b10000000000000000000000000000000'u32 shr 31 doAssert(ref3 == 1)