mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 01:14:41 +00:00
* make unary minus part of number literals, refs #17020 * fixes regression
This commit is contained in:
@@ -212,7 +212,7 @@
|
||||
|
||||
- `std/options` changed `$some(3)` to `"some(3)"` instead of `"Some(3)"`
|
||||
and `$none(int)` to `"none(int)"` instead of `"None[int]"`.
|
||||
|
||||
|
||||
- Added `algorithm.merge`.
|
||||
|
||||
|
||||
@@ -263,6 +263,8 @@
|
||||
|
||||
- `typedesc[Foo]` now renders as such instead of `type Foo` in compiler messages.
|
||||
|
||||
- The unary minus in `-1` is now part of the integer literal, it is now parsed as a single token.
|
||||
This implies that edge cases like `-128'i8` finally work correctly.
|
||||
|
||||
|
||||
## Compiler changes
|
||||
|
||||
@@ -26,6 +26,7 @@ const
|
||||
SymStartChars*: set[char] = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'}
|
||||
OpChars*: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
|
||||
'|', '=', '%', '&', '$', '@', '~', ':'}
|
||||
UnaryMinusWhitelist = {' ', '\t', '\n', '\r', ',', ';', '(', '[', '{'}
|
||||
|
||||
# don't forget to update the 'highlite' module if these charsets should change
|
||||
|
||||
@@ -51,22 +52,22 @@ type
|
||||
tkVar = "var", tkWhen = "when", tkWhile = "while", tkXor = "xor",
|
||||
tkYield = "yield", # end of keywords
|
||||
|
||||
tkIntLit = "tkIntLit", tkInt8Lit = "tkInt8Lit", tkInt16Lit = "tkInt16Lit",
|
||||
tkIntLit = "tkIntLit", tkInt8Lit = "tkInt8Lit", tkInt16Lit = "tkInt16Lit",
|
||||
tkInt32Lit = "tkInt32Lit", tkInt64Lit = "tkInt64Lit",
|
||||
tkUIntLit = "tkUIntLit", tkUInt8Lit = "tkUInt8Lit", tkUInt16Lit = "tkUInt16Lit",
|
||||
tkUIntLit = "tkUIntLit", tkUInt8Lit = "tkUInt8Lit", tkUInt16Lit = "tkUInt16Lit",
|
||||
tkUInt32Lit = "tkUInt32Lit", tkUInt64Lit = "tkUInt64Lit",
|
||||
tkFloatLit = "tkFloatLit", tkFloat32Lit = "tkFloat32Lit",
|
||||
tkFloat64Lit = "tkFloat64Lit", tkFloat128Lit = "tkFloat128Lit",
|
||||
tkStrLit = "tkStrLit", tkRStrLit = "tkRStrLit", tkTripleStrLit = "tkTripleStrLit",
|
||||
tkGStrLit = "tkGStrLit", tkGTripleStrLit = "tkGTripleStrLit", tkCharLit = "tkCharLit",
|
||||
|
||||
tkGStrLit = "tkGStrLit", tkGTripleStrLit = "tkGTripleStrLit", tkCharLit = "tkCharLit",
|
||||
|
||||
tkParLe = "(", tkParRi = ")", tkBracketLe = "[",
|
||||
tkBracketRi = "]", tkCurlyLe = "{", tkCurlyRi = "}",
|
||||
tkBracketDotLe = "[.", tkBracketDotRi = ".]",
|
||||
tkCurlyDotLe = "{.", tkCurlyDotRi = ".}",
|
||||
tkParDotLe = "(.", tkParDotRi = ".)",
|
||||
tkComma = ",", tkSemiColon = ";",
|
||||
tkColon = ":", tkColonColon = "::", tkEquals = "=",
|
||||
tkColon = ":", tkColonColon = "::", tkEquals = "=",
|
||||
tkDot = ".", tkDotDot = "..", tkBracketLeColon = "[:",
|
||||
tkOpr, tkComment, tkAccent = "`",
|
||||
# these are fake tokens used by renderer.nim
|
||||
@@ -348,6 +349,14 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
startpos = L.bufpos
|
||||
tokenBegin(result, startpos)
|
||||
|
||||
var isPositive = true
|
||||
if L.buf[L.bufpos] == '-':
|
||||
eatChar(L, result)
|
||||
isPositive = true
|
||||
|
||||
template setNumber(field, value) =
|
||||
field = (if isPositive: value else: -value)
|
||||
|
||||
# First stage: find out base, make verifications, build token literal string
|
||||
# {'c', 'C'} is added for deprecation reasons to provide a clear error message
|
||||
if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'c', 'C', 'O'}:
|
||||
@@ -459,7 +468,7 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
|
||||
# Third stage, extract actual number
|
||||
L.bufpos = startpos # restore position
|
||||
var pos: int = startpos
|
||||
var pos = startpos
|
||||
try:
|
||||
if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
|
||||
inc(pos, 2)
|
||||
@@ -500,20 +509,20 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
internalError(L.config, getLineInfo(L), "getNumber")
|
||||
|
||||
case result.tokType
|
||||
of tkIntLit, tkInt64Lit: result.iNumber = xi
|
||||
of tkInt8Lit: result.iNumber = ashr(xi shl 56, 56)
|
||||
of tkInt16Lit: result.iNumber = ashr(xi shl 48, 48)
|
||||
of tkInt32Lit: result.iNumber = ashr(xi shl 32, 32)
|
||||
of tkUIntLit, tkUInt64Lit: result.iNumber = xi
|
||||
of tkUInt8Lit: result.iNumber = xi and 0xff
|
||||
of tkUInt16Lit: result.iNumber = xi and 0xffff
|
||||
of tkUInt32Lit: result.iNumber = xi and 0xffffffff
|
||||
of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
|
||||
of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
|
||||
of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
|
||||
of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
|
||||
of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
|
||||
of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
|
||||
of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
|
||||
of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
|
||||
of tkFloat32Lit:
|
||||
result.fNumber = (cast[PFloat32](addr(xi)))[]
|
||||
setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
|
||||
# note: this code is endian neutral!
|
||||
# XXX: Test this on big endian machine!
|
||||
of tkFloat64Lit, tkFloatLit:
|
||||
result.fNumber = (cast[PFloat64](addr(xi)))[]
|
||||
setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
|
||||
else: internalError(L.config, getLineInfo(L), "getNumber")
|
||||
|
||||
# Bounds checks. Non decimal literals are allowed to overflow the range of
|
||||
@@ -521,12 +530,13 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
# below checks of signed sizes against uint*.high is deliberate:
|
||||
# (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
|
||||
if result.tokType notin floatTypes:
|
||||
let outOfRange = case result.tokType:
|
||||
of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
|
||||
of tkInt8Lit: (xi > BiggestInt(uint8.high))
|
||||
of tkInt16Lit: (xi > BiggestInt(uint16.high))
|
||||
of tkInt32Lit: (xi > BiggestInt(uint32.high))
|
||||
else: false
|
||||
let outOfRange =
|
||||
case result.tokType
|
||||
of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
|
||||
of tkInt8Lit: (xi > BiggestInt(uint8.high))
|
||||
of tkInt16Lit: (xi > BiggestInt(uint16.high))
|
||||
of tkInt32Lit: (xi > BiggestInt(uint32.high))
|
||||
else: false
|
||||
|
||||
if outOfRange:
|
||||
#echo "out of range num: ", result.iNumber, " vs ", xi
|
||||
@@ -557,23 +567,23 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
raise newException(ValueError, "invalid integer: " & $result.literal)
|
||||
result.iNumber = iNumber
|
||||
|
||||
# Explicit bounds checks. Only T.high needs to be considered
|
||||
# since result.iNumber can't be negative.
|
||||
# Explicit bounds checks.
|
||||
let outOfRange =
|
||||
case result.tokType
|
||||
of tkInt8Lit: result.iNumber > int8.high
|
||||
of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high)
|
||||
of tkInt16Lit: result.iNumber > int16.high
|
||||
of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high)
|
||||
of tkInt32Lit: result.iNumber > int32.high
|
||||
of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high)
|
||||
of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
|
||||
of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
|
||||
of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
|
||||
of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
|
||||
of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
|
||||
of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
|
||||
else: false
|
||||
|
||||
if outOfRange: lexMessageLitNum(L, "number out of range: '$1'", startpos)
|
||||
if outOfRange:
|
||||
lexMessageLitNum(L, "number out of range: '$1'", startpos)
|
||||
|
||||
# Promote int literal to int64? Not always necessary, but more consistent
|
||||
if result.tokType == tkIntLit:
|
||||
if result.iNumber > high(int32):
|
||||
if result.iNumber > high(int32) or result.iNumber < low(int32):
|
||||
result.tokType = tkInt64Lit
|
||||
|
||||
except ValueError:
|
||||
@@ -1278,6 +1288,19 @@ proc rawGetTok*(L: var Lexer, tok: var Token) =
|
||||
let c = L.buf[L.bufpos]
|
||||
if c in SymChars+{'_'}:
|
||||
lexMessage(L, errGenerated, "invalid token: no whitespace between number and identifier")
|
||||
of '-':
|
||||
if L.buf[L.bufpos+1] in {'0'..'9'} and
|
||||
(L.bufpos-1 == 0 or L.buf[L.bufpos-1] in UnaryMinusWhitelist):
|
||||
# x)-23 # binary minus
|
||||
# ,-23 # unary minus
|
||||
# \n-78 # unary minus? Yes.
|
||||
# =-3 # parsed as `=-` anyway
|
||||
getNumber(L, tok)
|
||||
let c = L.buf[L.bufpos]
|
||||
if c in SymChars+{'_'}:
|
||||
lexMessage(L, errGenerated, "invalid token: no whitespace between number and identifier")
|
||||
else:
|
||||
getOperator(L, tok)
|
||||
else:
|
||||
if c in OpChars:
|
||||
getOperator(L, tok)
|
||||
|
||||
@@ -299,7 +299,7 @@ proc semArrayIndex(c: PContext, n: PNode): PType =
|
||||
result = makeRangeWithStaticExpr(c, e.typ.n)
|
||||
elif e.kind in {nkIntLit..nkUInt64Lit}:
|
||||
if e.intVal < 0:
|
||||
localError(c.config, n[1].info,
|
||||
localError(c.config, n.info,
|
||||
"Array length can't be negative, but was " & $e.intVal)
|
||||
result = makeRangeType(c, 0, e.intVal-1, n.info, e.typ)
|
||||
elif e.kind == nkSym and e.typ.kind == tyStatic:
|
||||
|
||||
@@ -499,10 +499,11 @@ Numerical constants are of a single type and have the form::
|
||||
hexdigit = digit | 'A'..'F' | 'a'..'f'
|
||||
octdigit = '0'..'7'
|
||||
bindigit = '0'..'1'
|
||||
HEX_LIT = '0' ('x' | 'X' ) hexdigit ( ['_'] hexdigit )*
|
||||
DEC_LIT = digit ( ['_'] digit )*
|
||||
OCT_LIT = '0' 'o' octdigit ( ['_'] octdigit )*
|
||||
BIN_LIT = '0' ('b' | 'B' ) bindigit ( ['_'] bindigit )*
|
||||
unary_minus = '-' # See the section about unary minus
|
||||
HEX_LIT = unary_minus? '0' ('x' | 'X' ) hexdigit ( ['_'] hexdigit )*
|
||||
DEC_LIT = unary_minus? digit ( ['_'] digit )*
|
||||
OCT_LIT = unary_minus? '0' 'o' octdigit ( ['_'] octdigit )*
|
||||
BIN_LIT = unary_minus? '0' ('b' | 'B' ) bindigit ( ['_'] bindigit )*
|
||||
|
||||
INT_LIT = HEX_LIT
|
||||
| DEC_LIT
|
||||
@@ -521,7 +522,7 @@ Numerical constants are of a single type and have the form::
|
||||
UINT64_LIT = INT_LIT ['\''] ('u' | 'U') '64'
|
||||
|
||||
exponent = ('e' | 'E' ) ['+' | '-'] digit ( ['_'] digit )*
|
||||
FLOAT_LIT = digit (['_'] digit)* (('.' digit (['_'] digit)* [exponent]) |exponent)
|
||||
FLOAT_LIT = unary_minus? digit (['_'] digit)* (('.' digit (['_'] digit)* [exponent]) |exponent)
|
||||
FLOAT32_SUFFIX = ('f' | 'F') ['32']
|
||||
FLOAT32_LIT = HEX_LIT '\'' FLOAT32_SUFFIX
|
||||
| (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) ['\''] FLOAT32_SUFFIX
|
||||
@@ -535,6 +536,38 @@ for readability. Integer and floating-point literals may be given in decimal (no
|
||||
prefix), binary (prefix `0b`), octal (prefix `0o`), and hexadecimal
|
||||
(prefix `0x`) notation.
|
||||
|
||||
The fact that the unary minus `-` in a number literal like `-1` is considered
|
||||
to be part of the literal is a late addition to the language. The rationale is that
|
||||
an expression `-128'i8` should be valid and without this special case, this would
|
||||
be impossible -- `128` is not a valid `int8` value, only `-128` is.
|
||||
|
||||
For the `unary_minus` rule there are further restrictions that are not covered
|
||||
in the formal grammar. For `-` to be part of the number literal its immediately
|
||||
preceeding character has to be in the
|
||||
set `{' ', '\t', '\n', '\r', ',', ';', '(', '[', '{'}`. This set was designed to
|
||||
cover most cases in a natural manner.
|
||||
|
||||
In the following examples, `-1` is a single token:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
echo -1
|
||||
echo(-1)
|
||||
echo [-1]
|
||||
echo 3,-1
|
||||
|
||||
"abc";-1
|
||||
|
||||
In the following examples, `-1` is parsed as two separate tokens (as `- 1`):
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
echo x-1
|
||||
echo (int)-1
|
||||
echo [a]-1
|
||||
"abc"-1
|
||||
|
||||
|
||||
There exists a literal for each numerical type that is
|
||||
defined. The suffix starting with an apostrophe ('\'') is called a
|
||||
`type suffix`:idx:. Literals without a type suffix are of an integer type
|
||||
|
||||
76
tests/lexer/tunary_minus.nim
Normal file
76
tests/lexer/tunary_minus.nim
Normal file
@@ -0,0 +1,76 @@
|
||||
discard """
|
||||
targets: "c cpp js"
|
||||
"""
|
||||
|
||||
# Test numeric literals and handling of minus symbol
|
||||
|
||||
import std/[macros, strutils]
|
||||
|
||||
macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
|
||||
|
||||
macro assertAST*(expected: string, struct: untyped): untyped =
|
||||
var ast = newLit(struct.treeRepr)
|
||||
result = quote do:
|
||||
if `ast` != `expected`:
|
||||
doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
|
||||
|
||||
const one = 1
|
||||
const minusOne = `-`(one)
|
||||
|
||||
# border cases that *should* generate compiler errors:
|
||||
assertAST dedent """
|
||||
StmtList
|
||||
Asgn
|
||||
Ident "x"
|
||||
Command
|
||||
IntLit 4
|
||||
IntLit -1""":
|
||||
x = 4 -1
|
||||
assertAST dedent """
|
||||
StmtList
|
||||
VarSection
|
||||
IdentDefs
|
||||
Ident "x"
|
||||
Ident "uint"
|
||||
IntLit -1""":
|
||||
var x: uint = -1
|
||||
template bad() =
|
||||
x = 4 -1
|
||||
doAssert not compiles(bad())
|
||||
|
||||
template main =
|
||||
block: # check when a minus (-) is a negative sign for a literal
|
||||
doAssert -1 == minusOne:
|
||||
"unable to parse a spaced-prefixed negative int"
|
||||
doAssert lispReprStr(-1) == """(IntLit -1)"""
|
||||
doAssert -1.0'f64 == minusOne.float64
|
||||
doAssert lispReprStr(-1.000'f64) == """(Float64Lit -1.0)"""
|
||||
doAssert lispReprStr( -1.000'f64) == """(Float64Lit -1.0)"""
|
||||
doAssert [-1].contains(minusOne):
|
||||
"unable to handle negatives after square bracket"
|
||||
doAssert lispReprStr([-1]) == """(Bracket (IntLit -1))"""
|
||||
doAssert (-1, 2)[0] == minusOne:
|
||||
"unable to handle negatives after parenthesis"
|
||||
doAssert lispReprStr((-1, 2)) == """(Par (IntLit -1) (IntLit 2))"""
|
||||
proc x(): int =
|
||||
var a = 1;-1 # the -1 should act as the return value
|
||||
doAssert x() == minusOne:
|
||||
"unable to handle negatives after semi-colon"
|
||||
|
||||
block: # check when a minus (-) is an unary op
|
||||
doAssert -one == minusOne:
|
||||
"unable to a negative prior to identifier"
|
||||
|
||||
block: # check when a minus (-) is a a subtraction op
|
||||
doAssert 4-1 == 3:
|
||||
"unable to handle subtraction sans surrounding spaces with a numeric literal"
|
||||
doAssert 4-one == 3:
|
||||
"unable to handle subtraction sans surrounding spaces with an identifier"
|
||||
doAssert 4 - 1 == 3:
|
||||
"unable to handle subtraction with surrounding spaces with a numeric literal"
|
||||
doAssert 4 - one == 3:
|
||||
"unable to handle subtraction with surrounding spaces with an identifier"
|
||||
|
||||
|
||||
static: main()
|
||||
main()
|
||||
Reference in New Issue
Block a user