mirror of
https://github.com/nim-lang/Nim.git
synced 2026-06-06 03:44:14 +00:00
custom integer literals (#17489)
* user defined integer literals; refs #17020 * updated renderer.nim * use mlexerutils helper * imported all test cases from https://github.com/nim-lang/Nim/pull/17020 * final grammar updated
This commit is contained in:
@@ -266,6 +266,8 @@
|
||||
- The unary minus in `-1` is now part of the integer literal, it is now parsed as a single token.
|
||||
This implies that edge cases like `-128'i8` finally work correctly.
|
||||
|
||||
- Custom numeric literals are now supported.
|
||||
|
||||
|
||||
## Compiler changes
|
||||
|
||||
|
||||
@@ -416,7 +416,7 @@ proc nodeToHighlightedHtml(d: PDoc; n: PNode; result: var Rope; renderFlags: TRe
|
||||
of tkOpr:
|
||||
dispA(d.conf, result, "<span class=\"Operator\">$1</span>", "\\spanOperator{$1}",
|
||||
[escLit])
|
||||
of tkStrLit..tkTripleStrLit:
|
||||
of tkStrLit..tkTripleStrLit, tkCustomLit:
|
||||
dispA(d.conf, result, "<span class=\"StringLit\">$1</span>",
|
||||
"\\spanStringLit{$1}", [escLit])
|
||||
of tkCharLit:
|
||||
|
||||
@@ -60,6 +60,7 @@ type
|
||||
tkFloat64Lit = "tkFloat64Lit", tkFloat128Lit = "tkFloat128Lit",
|
||||
tkStrLit = "tkStrLit", tkRStrLit = "tkRStrLit", tkTripleStrLit = "tkTripleStrLit",
|
||||
tkGStrLit = "tkGStrLit", tkGTripleStrLit = "tkGTripleStrLit", tkCharLit = "tkCharLit",
|
||||
tkCustomLit = "tkCustomLit",
|
||||
|
||||
tkParLe = "(", tkParRi = ")", tkBracketLe = "[",
|
||||
tkBracketRi = "]", tkCurlyLe = "{", tkCurlyRi = "}",
|
||||
@@ -313,8 +314,7 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
|
||||
proc lexMessageLitNum(L: var Lexer, msg: string, startpos: int, msgKind = errGenerated) =
|
||||
# Used to get slightly human friendlier err messages.
|
||||
const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
|
||||
'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'}
|
||||
const literalishChars = {'A'..'Z', 'a'..'z', '0'..'9', '_', '.', '\''}
|
||||
var msgPos = L.bufpos
|
||||
var t: Token
|
||||
t.literal = ""
|
||||
@@ -326,15 +326,14 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
t.literal.add(L.buf[L.bufpos])
|
||||
inc(L.bufpos)
|
||||
matchChars(L, t, literalishChars)
|
||||
if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
|
||||
inc(L.bufpos)
|
||||
if L.buf[L.bufpos] in literalishChars:
|
||||
t.literal.add(L.buf[L.bufpos])
|
||||
inc(L.bufpos)
|
||||
matchChars(L, t, {'0'..'9'})
|
||||
L.bufpos = msgPos
|
||||
lexMessage(L, msgKind, msg % t.literal)
|
||||
|
||||
var
|
||||
startpos, endpos: int
|
||||
xi: BiggestInt
|
||||
isBase10 = true
|
||||
numDigits = 0
|
||||
@@ -346,7 +345,7 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
result.tokType = tkIntLit # int literal until we know better
|
||||
result.literal = ""
|
||||
result.base = base10
|
||||
startpos = L.bufpos
|
||||
let startpos = L.bufpos
|
||||
tokenBegin(result, startpos)
|
||||
|
||||
var isPositive = true
|
||||
@@ -395,201 +394,187 @@ proc getNumber(L: var Lexer, result: var Token) =
|
||||
discard matchUnderscoreChars(L, result, {'0'..'9'})
|
||||
if L.buf[L.bufpos] in {'e', 'E'}:
|
||||
result.tokType = tkFloatLit
|
||||
eatChar(L, result, 'e')
|
||||
eatChar(L, result)
|
||||
if L.buf[L.bufpos] in {'+', '-'}:
|
||||
eatChar(L, result)
|
||||
discard matchUnderscoreChars(L, result, {'0'..'9'})
|
||||
endpos = L.bufpos
|
||||
let endpos = L.bufpos
|
||||
|
||||
# Second stage, find out if there's a datatype suffix and handle it
|
||||
var postPos = endpos
|
||||
|
||||
if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
|
||||
let errPos = postPos
|
||||
var customLitPossible = false
|
||||
if L.buf[postPos] == '\'':
|
||||
inc(postPos)
|
||||
customLitPossible = true
|
||||
|
||||
case L.buf[postPos]
|
||||
of 'f', 'F':
|
||||
inc(postPos)
|
||||
if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
|
||||
result.tokType = tkFloat32Lit
|
||||
inc(postPos, 2)
|
||||
elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
|
||||
result.tokType = tkFloat64Lit
|
||||
inc(postPos, 2)
|
||||
elif (L.buf[postPos] == '1') and
|
||||
(L.buf[postPos + 1] == '2') and
|
||||
(L.buf[postPos + 2] == '8'):
|
||||
result.tokType = tkFloat128Lit
|
||||
inc(postPos, 3)
|
||||
else: # "f" alone defaults to float32
|
||||
result.tokType = tkFloat32Lit
|
||||
of 'd', 'D': # ad hoc convenience shortcut for f64
|
||||
inc(postPos)
|
||||
result.tokType = tkFloat64Lit
|
||||
of 'i', 'I':
|
||||
inc(postPos)
|
||||
if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
|
||||
result.tokType = tkInt64Lit
|
||||
inc(postPos, 2)
|
||||
elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
|
||||
result.tokType = tkInt32Lit
|
||||
inc(postPos, 2)
|
||||
elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
|
||||
result.tokType = tkInt16Lit
|
||||
inc(postPos, 2)
|
||||
elif (L.buf[postPos] == '8'):
|
||||
result.tokType = tkInt8Lit
|
||||
inc(postPos)
|
||||
if L.buf[postPos] in SymChars:
|
||||
var suffixAsLower = newStringOfCap(10)
|
||||
var suffix = newStringOfCap(10)
|
||||
while true:
|
||||
let c = L.buf[postPos]
|
||||
suffix.add c
|
||||
suffixAsLower.add toLowerAscii(c)
|
||||
inc postPos
|
||||
if L.buf[postPos] notin SymChars+{'_'}: break
|
||||
case suffix
|
||||
of "f", "f32": result.tokType = tkFloat32Lit
|
||||
of "d", "f64": result.tokType = tkFloat64Lit
|
||||
of "f128": result.tokType = tkFloat128Lit
|
||||
of "i8": result.tokType = tkInt8Lit
|
||||
of "i16": result.tokType = tkInt16Lit
|
||||
of "i32": result.tokType = tkInt32Lit
|
||||
of "i64": result.tokType = tkInt64Lit
|
||||
of "u": result.tokType = tkUIntLit
|
||||
of "u8": result.tokType = tkUInt8Lit
|
||||
of "u16": result.tokType = tkUInt16Lit
|
||||
of "u32": result.tokType = tkUInt32Lit
|
||||
of "u64": result.tokType = tkUInt64Lit
|
||||
else:
|
||||
lexMessageLitNum(L, "invalid number: '$1'", startpos)
|
||||
of 'u', 'U':
|
||||
inc(postPos)
|
||||
if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
|
||||
result.tokType = tkUInt64Lit
|
||||
inc(postPos, 2)
|
||||
elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
|
||||
result.tokType = tkUInt32Lit
|
||||
inc(postPos, 2)
|
||||
elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
|
||||
result.tokType = tkUInt16Lit
|
||||
inc(postPos, 2)
|
||||
elif (L.buf[postPos] == '8'):
|
||||
result.tokType = tkUInt8Lit
|
||||
inc(postPos)
|
||||
else:
|
||||
result.tokType = tkUIntLit
|
||||
if customLitPossible:
|
||||
# remember the position of the ``'`` so that the parser doesn't
|
||||
# have to reparse the custom literal:
|
||||
result.iNumber = len(result.literal)
|
||||
result.literal.add '\''
|
||||
result.literal.add suffix
|
||||
result.tokType = tkCustomLit
|
||||
else:
|
||||
lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
|
||||
else:
|
||||
lexMessageLitNum(L, "invalid number: '$1'", startpos)
|
||||
lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
|
||||
|
||||
# Is there still a literalish char awaiting? Then it's an error!
|
||||
if L.buf[postPos] in literalishChars or
|
||||
(L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
|
||||
lexMessageLitNum(L, "invalid number: '$1'", startpos)
|
||||
|
||||
# Third stage, extract actual number
|
||||
L.bufpos = startpos # restore position
|
||||
var pos = startpos
|
||||
try:
|
||||
if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
|
||||
inc(pos, 2)
|
||||
xi = 0 # it is a base prefix
|
||||
if result.tokType != tkCustomLit:
|
||||
# Third stage, extract actual number
|
||||
L.bufpos = startpos # restore position
|
||||
var pos = startpos
|
||||
try:
|
||||
if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
|
||||
inc(pos, 2)
|
||||
xi = 0 # it is a base prefix
|
||||
|
||||
case L.buf[pos - 1]
|
||||
of 'b', 'B':
|
||||
result.base = base2
|
||||
while pos < endpos:
|
||||
if L.buf[pos] != '_':
|
||||
xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
|
||||
inc(pos)
|
||||
# 'c', 'C' is deprecated
|
||||
of 'o', 'c', 'C':
|
||||
result.base = base8
|
||||
while pos < endpos:
|
||||
if L.buf[pos] != '_':
|
||||
xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
|
||||
inc(pos)
|
||||
of 'x', 'X':
|
||||
result.base = base16
|
||||
while pos < endpos:
|
||||
case L.buf[pos]
|
||||
of '_':
|
||||
case L.buf[pos - 1]
|
||||
of 'b', 'B':
|
||||
result.base = base2
|
||||
while pos < endpos:
|
||||
if L.buf[pos] != '_':
|
||||
xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
|
||||
inc(pos)
|
||||
of '0'..'9':
|
||||
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
|
||||
# 'c', 'C' is deprecated
|
||||
of 'o', 'c', 'C':
|
||||
result.base = base8
|
||||
while pos < endpos:
|
||||
if L.buf[pos] != '_':
|
||||
xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
|
||||
inc(pos)
|
||||
of 'a'..'f':
|
||||
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
|
||||
inc(pos)
|
||||
of 'A'..'F':
|
||||
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
|
||||
inc(pos)
|
||||
else:
|
||||
break
|
||||
of 'x', 'X':
|
||||
result.base = base16
|
||||
while pos < endpos:
|
||||
case L.buf[pos]
|
||||
of '_':
|
||||
inc(pos)
|
||||
of '0'..'9':
|
||||
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
|
||||
inc(pos)
|
||||
of 'a'..'f':
|
||||
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
|
||||
inc(pos)
|
||||
of 'A'..'F':
|
||||
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
|
||||
inc(pos)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
internalError(L.config, getLineInfo(L), "getNumber")
|
||||
|
||||
case result.tokType
|
||||
of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
|
||||
of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
|
||||
of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
|
||||
of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
|
||||
of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
|
||||
of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
|
||||
of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
|
||||
of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
|
||||
of tkFloat32Lit:
|
||||
setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
|
||||
# note: this code is endian neutral!
|
||||
# XXX: Test this on big endian machine!
|
||||
of tkFloat64Lit, tkFloatLit:
|
||||
setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
|
||||
else: internalError(L.config, getLineInfo(L), "getNumber")
|
||||
|
||||
# Bounds checks. Non decimal literals are allowed to overflow the range of
|
||||
# the datatype as long as their pattern don't overflow _bitwise_, hence
|
||||
# below checks of signed sizes against uint*.high is deliberate:
|
||||
# (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
|
||||
if result.tokType notin floatTypes:
|
||||
let outOfRange =
|
||||
case result.tokType
|
||||
of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
|
||||
of tkInt8Lit: (xi > BiggestInt(uint8.high))
|
||||
of tkInt16Lit: (xi > BiggestInt(uint16.high))
|
||||
of tkInt32Lit: (xi > BiggestInt(uint32.high))
|
||||
else: false
|
||||
|
||||
if outOfRange:
|
||||
#echo "out of range num: ", result.iNumber, " vs ", xi
|
||||
lexMessageLitNum(L, "number out of range: '$1'", startpos)
|
||||
|
||||
else:
|
||||
internalError(L.config, getLineInfo(L), "getNumber")
|
||||
case result.tokType
|
||||
of floatTypes:
|
||||
result.fNumber = parseFloat(result.literal)
|
||||
of tkUInt64Lit, tkUIntLit:
|
||||
var iNumber: uint64
|
||||
var len: int
|
||||
try:
|
||||
len = parseBiggestUInt(result.literal, iNumber)
|
||||
except ValueError:
|
||||
raise newException(OverflowDefect, "number out of range: " & $result.literal)
|
||||
if len != result.literal.len:
|
||||
raise newException(ValueError, "invalid integer: " & $result.literal)
|
||||
result.iNumber = cast[int64](iNumber)
|
||||
else:
|
||||
var iNumber: int64
|
||||
var len: int
|
||||
try:
|
||||
len = parseBiggestInt(result.literal, iNumber)
|
||||
except ValueError:
|
||||
raise newException(OverflowDefect, "number out of range: " & $result.literal)
|
||||
if len != result.literal.len:
|
||||
raise newException(ValueError, "invalid integer: " & $result.literal)
|
||||
result.iNumber = iNumber
|
||||
|
||||
case result.tokType
|
||||
of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
|
||||
of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
|
||||
of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
|
||||
of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
|
||||
of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
|
||||
of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
|
||||
of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
|
||||
of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
|
||||
of tkFloat32Lit:
|
||||
setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
|
||||
# note: this code is endian neutral!
|
||||
# XXX: Test this on big endian machine!
|
||||
of tkFloat64Lit, tkFloatLit:
|
||||
setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
|
||||
else: internalError(L.config, getLineInfo(L), "getNumber")
|
||||
|
||||
# Bounds checks. Non decimal literals are allowed to overflow the range of
|
||||
# the datatype as long as their pattern don't overflow _bitwise_, hence
|
||||
# below checks of signed sizes against uint*.high is deliberate:
|
||||
# (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
|
||||
if result.tokType notin floatTypes:
|
||||
# Explicit bounds checks.
|
||||
let outOfRange =
|
||||
case result.tokType
|
||||
of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
|
||||
of tkInt8Lit: (xi > BiggestInt(uint8.high))
|
||||
of tkInt16Lit: (xi > BiggestInt(uint16.high))
|
||||
of tkInt32Lit: (xi > BiggestInt(uint32.high))
|
||||
of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
|
||||
of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
|
||||
of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
|
||||
of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
|
||||
of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
|
||||
of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
|
||||
else: false
|
||||
|
||||
if outOfRange:
|
||||
#echo "out of range num: ", result.iNumber, " vs ", xi
|
||||
lexMessageLitNum(L, "number out of range: '$1'", startpos)
|
||||
|
||||
else:
|
||||
case result.tokType
|
||||
of floatTypes:
|
||||
result.fNumber = parseFloat(result.literal)
|
||||
of tkUInt64Lit, tkUIntLit:
|
||||
var iNumber: uint64
|
||||
var len: int
|
||||
try:
|
||||
len = parseBiggestUInt(result.literal, iNumber)
|
||||
except ValueError:
|
||||
raise newException(OverflowDefect, "number out of range: " & $result.literal)
|
||||
if len != result.literal.len:
|
||||
raise newException(ValueError, "invalid integer: " & $result.literal)
|
||||
result.iNumber = cast[int64](iNumber)
|
||||
else:
|
||||
var iNumber: int64
|
||||
var len: int
|
||||
try:
|
||||
len = parseBiggestInt(result.literal, iNumber)
|
||||
except ValueError:
|
||||
raise newException(OverflowDefect, "number out of range: " & $result.literal)
|
||||
if len != result.literal.len:
|
||||
raise newException(ValueError, "invalid integer: " & $result.literal)
|
||||
result.iNumber = iNumber
|
||||
# Promote int literal to int64? Not always necessary, but more consistent
|
||||
if result.tokType == tkIntLit:
|
||||
if result.iNumber > high(int32) or result.iNumber < low(int32):
|
||||
result.tokType = tkInt64Lit
|
||||
|
||||
# Explicit bounds checks.
|
||||
let outOfRange =
|
||||
case result.tokType
|
||||
of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
|
||||
of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
|
||||
of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
|
||||
of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
|
||||
of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
|
||||
of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
|
||||
else: false
|
||||
|
||||
if outOfRange:
|
||||
lexMessageLitNum(L, "number out of range: '$1'", startpos)
|
||||
|
||||
# Promote int literal to int64? Not always necessary, but more consistent
|
||||
if result.tokType == tkIntLit:
|
||||
if result.iNumber > high(int32) or result.iNumber < low(int32):
|
||||
result.tokType = tkInt64Lit
|
||||
|
||||
except ValueError:
|
||||
lexMessageLitNum(L, "invalid number: '$1'", startpos)
|
||||
except OverflowDefect, RangeDefect:
|
||||
lexMessageLitNum(L, "number out of range: '$1'", startpos)
|
||||
except ValueError:
|
||||
lexMessageLitNum(L, "invalid number: '$1'", startpos)
|
||||
except OverflowDefect, RangeDefect:
|
||||
lexMessageLitNum(L, "number out of range: '$1'", startpos)
|
||||
tokenEnd(result, postPos-1)
|
||||
L.bufpos = postPos
|
||||
|
||||
@@ -830,8 +815,9 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
|
||||
inc(pos)
|
||||
L.bufpos = pos
|
||||
|
||||
proc getCharacter(L: var Lexer, tok: var Token) =
|
||||
proc getCharacter(L: var Lexer; tok: var Token) =
|
||||
tokenBegin(tok, L.bufpos)
|
||||
let startPos = L.bufpos
|
||||
inc(L.bufpos) # skip '
|
||||
var c = L.buf[L.bufpos]
|
||||
case c
|
||||
@@ -842,10 +828,16 @@ proc getCharacter(L: var Lexer, tok: var Token) =
|
||||
else:
|
||||
tok.literal = $c
|
||||
inc(L.bufpos)
|
||||
if L.buf[L.bufpos] != '\'':
|
||||
lexMessage(L, errGenerated, "missing closing ' for character literal")
|
||||
tokenEndIgnore(tok, L.bufpos)
|
||||
inc(L.bufpos) # skip '
|
||||
if L.buf[L.bufpos] == '\'':
|
||||
tokenEndIgnore(tok, L.bufpos)
|
||||
inc(L.bufpos) # skip '
|
||||
else:
|
||||
if startPos > 0 and L.buf[startPos-1] == '`':
|
||||
tok.literal = "'"
|
||||
L.bufpos = startPos+1
|
||||
else:
|
||||
lexMessage(L, errGenerated, "missing closing ' for character literal")
|
||||
tokenEndIgnore(tok, L.bufpos)
|
||||
|
||||
proc getSymbol(L: var Lexer, tok: var Token) =
|
||||
var h: Hash = 0
|
||||
|
||||
@@ -355,7 +355,7 @@ proc parseSymbol(p: var Parser, mode = smNormal): PNode =
|
||||
let node = newNodeI(nkIdent, lineinfo)
|
||||
node.ident = p.lex.cache.getIdent(accm)
|
||||
result.add(node)
|
||||
of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCharLit:
|
||||
of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCustomLit:
|
||||
result.add(newIdentNodeP(p.lex.cache.getIdent($p.tok), p))
|
||||
getTok(p)
|
||||
else:
|
||||
@@ -627,7 +627,7 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode =
|
||||
#| | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
|
||||
#| | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
|
||||
#| | STR_LIT | RSTR_LIT | TRIPLESTR_LIT
|
||||
#| | CHAR_LIT
|
||||
#| | CHAR_LIT | CUSTOM_NUMERIC_LIT
|
||||
#| | NIL
|
||||
#| generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
|
||||
#| identOrLiteral = generalizedLit | symbol | literal
|
||||
@@ -710,6 +710,14 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode =
|
||||
of tkCharLit:
|
||||
result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
|
||||
getTok(p)
|
||||
of tkCustomLit:
|
||||
let splitPos = p.tok.iNumber.int
|
||||
let str = newStrNodeP(nkRStrLit, p.tok.literal.substr(0, splitPos-1), p)
|
||||
let callee = newIdentNodeP(getIdent(p.lex.cache, p.tok.literal.substr(splitPos)), p)
|
||||
result = newNodeP(nkDotExpr, p)
|
||||
result.add str
|
||||
result.add callee
|
||||
getTok(p)
|
||||
of tkNil:
|
||||
result = newNodeP(nkNilLit, p)
|
||||
getTok(p)
|
||||
@@ -807,7 +815,7 @@ proc primarySuffix(p: var Parser, r: PNode,
|
||||
result = commandExpr(p, result, mode)
|
||||
break
|
||||
result = namedParams(p, result, nkCurlyExpr, tkCurlyRi)
|
||||
of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast,
|
||||
of tkSymbol, tkAccent, tkIntLit..tkCustomLit, tkNil, tkCast,
|
||||
tkOpr, tkDotDot, tkVar, tkOut, tkStatic, tkType, tkEnum, tkTuple,
|
||||
tkObject, tkProc:
|
||||
# XXX: In type sections we allow the free application of the
|
||||
@@ -1097,7 +1105,7 @@ proc isExprStart(p: Parser): bool =
|
||||
case p.tok.tokType
|
||||
of tkSymbol, tkAccent, tkOpr, tkNot, tkNil, tkCast, tkIf, tkFor,
|
||||
tkProc, tkFunc, tkIterator, tkBind, tkBuiltInMagics,
|
||||
tkParLe, tkBracketLe, tkCurlyLe, tkIntLit..tkCharLit, tkVar, tkRef, tkPtr,
|
||||
tkParLe, tkBracketLe, tkCurlyLe, tkIntLit..tkCustomLit, tkVar, tkRef, tkPtr,
|
||||
tkTuple, tkObject, tkWhen, tkCase, tkOut:
|
||||
result = true
|
||||
else: result = false
|
||||
@@ -1498,7 +1506,7 @@ proc parseReturnOrRaise(p: var Parser, kind: TNodeKind): PNode =
|
||||
#| yieldStmt = 'yield' optInd expr?
|
||||
#| discardStmt = 'discard' optInd expr?
|
||||
#| breakStmt = 'break' optInd expr?
|
||||
#| continueStmt = 'break' optInd expr?
|
||||
#| continueStmt = 'continue' optInd expr?
|
||||
result = newNodeP(kind, p)
|
||||
getTok(p)
|
||||
if p.tok.tokType == tkComment:
|
||||
|
||||
@@ -942,7 +942,7 @@ proc skipHiddenNodes(n: PNode): PNode =
|
||||
else: break
|
||||
|
||||
proc accentedName(g: var TSrcGen, n: PNode) =
|
||||
const backticksNeeded = OpChars + {'[', '{'}
|
||||
const backticksNeeded = OpChars + {'[', '{', '\''}
|
||||
if n == nil: return
|
||||
let isOperator =
|
||||
if n.kind == nkIdent and n.ident.s.len > 0 and n.ident.s[0] in backticksNeeded: true
|
||||
@@ -976,6 +976,11 @@ proc infixArgument(g: var TSrcGen, n: PNode, i: int) =
|
||||
if needsParenthesis:
|
||||
put(g, tkParRi, ")")
|
||||
|
||||
proc isCustomLit(n: PNode): bool =
|
||||
n.len == 2 and n[0].kind == nkRStrLit and
|
||||
(n[1].kind == nkIdent and n[1].ident.s.startsWith('\'')) or
|
||||
(n[1].kind == nkSym and n[1].sym.name.s.startsWith('\''))
|
||||
|
||||
proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
|
||||
if isNil(n): return
|
||||
var
|
||||
@@ -1195,9 +1200,14 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
|
||||
gcomma(g, n, c)
|
||||
put(g, tkBracketRi, "]")
|
||||
of nkDotExpr:
|
||||
gsub(g, n, 0)
|
||||
put(g, tkDot, ".")
|
||||
gsub(g, n, 1)
|
||||
if isCustomLit(n):
|
||||
put(g, tkCustomLit, n[0].strVal)
|
||||
gsub(g, n, 1)
|
||||
else:
|
||||
gsub(g, n, 0)
|
||||
put(g, tkDot, ".")
|
||||
if n.len > 1:
|
||||
accentedName(g, n[1])
|
||||
of nkBind:
|
||||
putWithSpace(g, tkBind, "bind")
|
||||
gsub(g, n, 0)
|
||||
|
||||
@@ -1524,7 +1524,7 @@ proc semProcAnnotation(c: PContext, prc: PNode;
|
||||
return
|
||||
|
||||
proc semInferredLambda(c: PContext, pt: TIdTable, n: PNode): PNode {.nosinks.} =
|
||||
## used for resolving 'auto' in lambdas based on their callsite
|
||||
## used for resolving 'auto' in lambdas based on their callsite
|
||||
var n = n
|
||||
let original = n[namePos].sym
|
||||
let s = original #copySym(original, false)
|
||||
|
||||
@@ -46,7 +46,7 @@ literal = | INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
|
||||
| UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
|
||||
| FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
|
||||
| STR_LIT | RSTR_LIT | TRIPLESTR_LIT
|
||||
| CHAR_LIT
|
||||
| CHAR_LIT | CUSTOM_NUMERIC_LIT
|
||||
| NIL
|
||||
generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
|
||||
identOrLiteral = generalizedLit | symbol | literal
|
||||
@@ -100,6 +100,7 @@ postExprBlocks = ':' stmt? ( IND{=} doBlock
|
||||
| IND{=} 'of' exprList ':' stmt
|
||||
| IND{=} 'elif' expr ':' stmt
|
||||
| IND{=} 'except' exprList ':' stmt
|
||||
| IND{=} 'finally' ':' stmt
|
||||
| IND{=} 'else' ':' stmt )*
|
||||
exprStmt = simpleExpr
|
||||
(( '=' optInd expr colonBody? )
|
||||
|
||||
@@ -490,11 +490,17 @@ this. Another reason is that Nim can thus support `array[char, int]` or
|
||||
type is used for Unicode characters, it can represent any Unicode character.
|
||||
`Rune` is declared in the `unicode module <unicode.html>`_.
|
||||
|
||||
A character literal that does not end in ``'`` interpreted as ``'`` if there
|
||||
is a preceeding backtick token. There must be no whitespace between the preceeding
|
||||
backtick token and the character literal. This special rule ensures that a declaration
|
||||
like ``proc `'customLiteral`(s: string)`` is valid. See also
|
||||
`Custom Numeric Literals <#custom-numeric-literals>`_.
|
||||
|
||||
Numerical constants
|
||||
-------------------
|
||||
|
||||
Numerical constants are of a single type and have the form::
|
||||
Numeric Literals
|
||||
----------------
|
||||
|
||||
Numeric literals have the form::
|
||||
|
||||
hexdigit = digit | 'A'..'F' | 'a'..'f'
|
||||
octdigit = '0'..'7'
|
||||
@@ -530,8 +536,10 @@ Numerical constants are of a single type and have the form::
|
||||
FLOAT64_LIT = HEX_LIT '\'' FLOAT64_SUFFIX
|
||||
| (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) ['\''] FLOAT64_SUFFIX
|
||||
|
||||
CUSTOM_NUMERIC_LIT = (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) '\'' CUSTOM_NUMERIC_SUFFIX
|
||||
|
||||
As can be seen in the productions, numerical constants can contain underscores
|
||||
|
||||
As can be seen in the productions, numeric literals can contain underscores
|
||||
for readability. Integer and floating-point literals may be given in decimal (no
|
||||
prefix), binary (prefix `0b`), octal (prefix `0o`), and hexadecimal
|
||||
(prefix `0x`) notation.
|
||||
@@ -579,7 +587,7 @@ is optional if it is not ambiguous (only hexadecimal floating-point literals
|
||||
with a type suffix can be ambiguous).
|
||||
|
||||
|
||||
The type suffixes are:
|
||||
The pre-defined type suffixes are:
|
||||
|
||||
================= =========================
|
||||
Type Suffix Resulting type of literal
|
||||
@@ -611,6 +619,42 @@ the bit width of the datatype, it is accepted.
|
||||
Hence: 0b10000000'u8 == 0x80'u8 == 128, but, 0b10000000'i8 == 0x80'i8 == -1
|
||||
instead of causing an overflow error.
|
||||
|
||||
|
||||
Custom Numeric Literals
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
If the suffix is not predefined, then the suffix is assumed to be a call
|
||||
to a proc, template, macro or other callable identifier that is passed the
|
||||
string containing the literal. The callable identifier needs to be declared
|
||||
with a special ``'`` prefix:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
import strutils
|
||||
type u4 = distinct uint8 # a 4-bit unsigned integer aka "nibble"
|
||||
proc `'u4`(n: string): u4 =
|
||||
# The leading ' is required.
|
||||
result = (parseInt(n) and 0x0F).u4
|
||||
|
||||
var x = 5'u4
|
||||
|
||||
More formally, a custom numeric literal `123'custom` is transformed
|
||||
to r"123".`'custom` in the parsing step. There is no AST node kind that
|
||||
corresponds to this transformation. The transformation naturally handles
|
||||
the case that additional parameters are passed to the callee:
|
||||
|
||||
.. code-block:: nim
|
||||
|
||||
import strutils
|
||||
type u4 = distinct uint8 # a 4-bit unsigned integer aka "nibble"
|
||||
proc `'u4`(n: string; moreData: int): u4 =
|
||||
result = (parseInt(n) and 0x0F).u4
|
||||
|
||||
var x = 5'u4(123)
|
||||
|
||||
Custom numeric literals are covered by the grammar rule named `CUSTOM_NUMERIC_LIT`.
|
||||
|
||||
|
||||
Operators
|
||||
---------
|
||||
|
||||
|
||||
9
tests/lexer/mlexerutils.nim
Normal file
9
tests/lexer/mlexerutils.nim
Normal file
@@ -0,0 +1,9 @@
|
||||
import macros
|
||||
|
||||
macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
|
||||
|
||||
macro assertAST*(expected: string, struct: untyped): untyped =
|
||||
var ast = newLit(struct.treeRepr)
|
||||
result = quote do:
|
||||
if `ast` != `expected`:
|
||||
doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
|
||||
150
tests/lexer/tcustom_numeric_literals.nim
Normal file
150
tests/lexer/tcustom_numeric_literals.nim
Normal file
@@ -0,0 +1,150 @@
|
||||
discard """
|
||||
targets: "c cpp js"
|
||||
"""
|
||||
|
||||
# Test tkStrNumLit
|
||||
|
||||
import std/[macros, strutils]
|
||||
import mlexerutils
|
||||
|
||||
# AST checks
|
||||
|
||||
assertAST dedent """
|
||||
StmtList
|
||||
ProcDef
|
||||
AccQuoted
|
||||
Ident "\'"
|
||||
Ident "wrap"
|
||||
Empty
|
||||
Empty
|
||||
FormalParams
|
||||
Ident "string"
|
||||
IdentDefs
|
||||
Ident "number"
|
||||
Ident "string"
|
||||
Empty
|
||||
Empty
|
||||
Empty
|
||||
StmtList
|
||||
Asgn
|
||||
Ident "result"
|
||||
Infix
|
||||
Ident "&"
|
||||
Infix
|
||||
Ident "&"
|
||||
StrLit "[["
|
||||
Ident "number"
|
||||
StrLit "]]"""":
|
||||
proc `'wrap`(number: string): string =
|
||||
result = "[[" & number & "]]"
|
||||
|
||||
assertAST dedent """
|
||||
StmtList
|
||||
DotExpr
|
||||
RStrLit "-38383839292839283928392839283928392839283.928493849385935898243e-50000"
|
||||
Ident "\'wrap"""":
|
||||
-38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap
|
||||
|
||||
proc `'wrap`(number: string): string = "[[" & number & "]]"
|
||||
doAssert lispReprStr(-1'wrap) == """(DotExpr (RStrLit "-1") (Ident "\'wrap"))"""
|
||||
|
||||
template main =
|
||||
block: # basic suffix usage
|
||||
template `'twrap`(number: string): untyped =
|
||||
number.`'wrap`
|
||||
proc extraContext(): string =
|
||||
22.40'wrap
|
||||
proc `*`(left, right: string): string =
|
||||
result = left & "times" & right
|
||||
proc `+`(left, right: string): string =
|
||||
result = left & "plus" & right
|
||||
|
||||
doAssert 1'wrap == "[[1]]"
|
||||
doAssert -1'wrap == "[[-1]]":
|
||||
"unable to resolve a negative integer-suffix pattern"
|
||||
doAssert 12345.67890'wrap == "[[12345.67890]]"
|
||||
doAssert 1'wrap*1'wrap == "[[1]]times[[1]]":
|
||||
"unable to resolve an operator between two suffixed numeric literals"
|
||||
doAssert 1'wrap+ -1'wrap == "[[1]]plus[[-1]]": # will generate a compiler warning about inconsistent spacing
|
||||
"unable to resolve a negative suffixed numeric literal following an operator"
|
||||
doAssert 1'wrap + -1'wrap == "[[1]]plus[[-1]]"
|
||||
doAssert 1'twrap == "[[1]]"
|
||||
doAssert extraContext() == "[[22.40]]":
|
||||
"unable to return a suffixed numeric literal by an implicit return"
|
||||
doAssert 0x5a3a'wrap == "[[0x5a3a]]"
|
||||
doAssert 0o5732'wrap == "[[0o5732]]"
|
||||
doAssert 0b0101111010101'wrap == "[[0b0101111010101]]"
|
||||
doAssert -38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap == "[[-38383839292839283928392839283928392839283.928493849385935898243e-50000]]"
|
||||
doAssert 1234.56'wrap == "[[1234.56]]":
|
||||
"unable to properly account for context with suffixed numeric literals"
|
||||
|
||||
block: # verify that the i64, f32, etc builtin suffixes still parse correctly
|
||||
const expectedF32: float32 = 123.125
|
||||
proc `'f9`(number: string): string = # proc starts with 'f' just like 'f32'
|
||||
"[[" & number & "]]"
|
||||
proc `'f32a`(number: string): string = # looks even more like 'f32'
|
||||
"[[" & number & "]]"
|
||||
proc `'d9`(number: string): string = # proc starts with 'd' just like the d suffix
|
||||
"[[" & number & "]]"
|
||||
proc `'i9`(number: string): string = # proc starts with 'i' just like 'i64'
|
||||
"[[" & number & "]]"
|
||||
proc `'u9`(number: string): string = # proc starts with 'u' just like 'u8'
|
||||
"[[" & number & "]]"
|
||||
|
||||
doAssert 123.125f32 == expectedF32:
|
||||
"failing to support non-quoted legacy f32 floating point suffix"
|
||||
doAssert 123.125'f32 == expectedF32
|
||||
doAssert 123.125e0'f32 == expectedF32
|
||||
doAssert 1234.56'wrap == 1234.56'f9
|
||||
doAssert 1234.56'wrap == 1234.56'f32a
|
||||
doAssert 1234.56'wrap == 1234.56'd9
|
||||
doAssert 1234.56'wrap == 1234.56'i9
|
||||
doAssert 1234.56'wrap == 1234.56'u9
|
||||
doAssert lispReprStr(1234.56'u9) == """(DotExpr (RStrLit "1234.56") (Ident "\'u9"))""":
|
||||
"failed to properly build AST for suffix that starts with u"
|
||||
doAssert -128'i8 == (-128).int8
|
||||
|
||||
block: # case checks
|
||||
doAssert 1E2 == 100:
|
||||
"lexer not handling upper-case exponent"
|
||||
doAssert 1.0E2 == 100.0
|
||||
doAssert 1e2 == 100
|
||||
doAssert 0xdeadBEEF'wrap == "[[0xdeadBEEF]]":
|
||||
"lexer not maintaining original case"
|
||||
doAssert 0.1E12'wrap == "[[0.1E12]]"
|
||||
doAssert 0.0e12'wrap == "[[0.0e12]]"
|
||||
doAssert 0.0e+12'wrap == "[[0.0e+12]]"
|
||||
doAssert 0.0e-12'wrap == "[[0.0e-12]]"
|
||||
doAssert 0e-12'wrap == "[[0e-12]]"
|
||||
|
||||
block: # macro and template usage
|
||||
template `'foo`(a: string): untyped = (a, 2)
|
||||
doAssert -12'foo == ("-12", 2)
|
||||
template `'fooplus`(a: string, b: int): untyped = (a, b)
|
||||
doAssert -12'fooplus(2) == ("-12", 2)
|
||||
template `'fooplusopt`(a: string, b: int = 99): untyped = (a, b)
|
||||
doAssert -12'fooplusopt(2) == ("-12", 2)
|
||||
doAssert -12'fooplusopt() == ("-12", 99)
|
||||
doAssert -12'fooplusopt == ("-12", 99)
|
||||
macro `'bar`(a: static string): untyped =
|
||||
var infix = newNimNode(nnkInfix)
|
||||
infix.add newIdentNode("&")
|
||||
infix.add newLit("got ")
|
||||
infix.add newLit(a.repr)
|
||||
result = newNimNode(nnkStmtList)
|
||||
result.add infix
|
||||
doAssert -12'bar == "got \"-12\""
|
||||
macro deb(a): untyped = newLit(a.repr)
|
||||
doAssert deb(-12'bar) == "-12'bar"
|
||||
# macro metawrap(): untyped =
|
||||
# func wrap1(a: string): string = "{" & a & "}"
|
||||
# func `'wrap2`(a: string): string = "{" & a & "}"
|
||||
# result = quote do:
|
||||
# let a1 = wrap1"-128"
|
||||
# let a2 = -128'wrap2
|
||||
# metawrap()
|
||||
# doAssert a1 == "{-128}"
|
||||
# doAssert a2 == "{-128}"
|
||||
|
||||
static: main()
|
||||
main()
|
||||
@@ -1,19 +0,0 @@
|
||||
discard """
|
||||
output: "a\"\"long string\"\"\"\"\"abc\"def_'2'●𝌆𝌆A"
|
||||
"""
|
||||
# Test the new different string literals
|
||||
|
||||
const
|
||||
tripleEmpty = """"long string"""""""" # "long string """""
|
||||
|
||||
rawQuote = r"a"""
|
||||
|
||||
raw = r"abc""def"
|
||||
|
||||
escaped = "\x5f'\50'\u25cf\u{1D306}\u{1d306}\u{41}"
|
||||
|
||||
|
||||
stdout.write(rawQuote)
|
||||
stdout.write(tripleEmpty)
|
||||
stdout.write(raw)
|
||||
stdout.writeLine(escaped)
|
||||
@@ -6,13 +6,7 @@ discard """
|
||||
|
||||
import std/[macros, strutils]
|
||||
|
||||
macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
|
||||
|
||||
macro assertAST*(expected: string, struct: untyped): untyped =
|
||||
var ast = newLit(struct.treeRepr)
|
||||
result = quote do:
|
||||
if `ast` != `expected`:
|
||||
doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
|
||||
import mlexerutils
|
||||
|
||||
const one = 1
|
||||
const minusOne = `-`(one)
|
||||
|
||||
Reference in New Issue
Block a user