custom integer literals (#17489)

* user defined integer literals; refs #17020
* updated renderer.nim
* use mlexerutils helper
* imported all test cases from https://github.com/nim-lang/Nim/pull/17020
* final grammar updated
This commit is contained in:
Andreas Rumpf
2021-03-24 14:46:19 +01:00
committed by GitHub
parent 7366a3da37
commit 5f5a92379f
12 changed files with 409 additions and 218 deletions

View File

@@ -266,6 +266,8 @@
- The unary minus in `-1` is now part of the integer literal, it is now parsed as a single token.
This implies that edge cases like `-128'i8` finally work correctly.
- Custom numeric literals are now supported.
## Compiler changes

View File

@@ -416,7 +416,7 @@ proc nodeToHighlightedHtml(d: PDoc; n: PNode; result: var Rope; renderFlags: TRe
of tkOpr:
dispA(d.conf, result, "<span class=\"Operator\">$1</span>", "\\spanOperator{$1}",
[escLit])
of tkStrLit..tkTripleStrLit:
of tkStrLit..tkTripleStrLit, tkCustomLit:
dispA(d.conf, result, "<span class=\"StringLit\">$1</span>",
"\\spanStringLit{$1}", [escLit])
of tkCharLit:

View File

@@ -60,6 +60,7 @@ type
tkFloat64Lit = "tkFloat64Lit", tkFloat128Lit = "tkFloat128Lit",
tkStrLit = "tkStrLit", tkRStrLit = "tkRStrLit", tkTripleStrLit = "tkTripleStrLit",
tkGStrLit = "tkGStrLit", tkGTripleStrLit = "tkGTripleStrLit", tkCharLit = "tkCharLit",
tkCustomLit = "tkCustomLit",
tkParLe = "(", tkParRi = ")", tkBracketLe = "[",
tkBracketRi = "]", tkCurlyLe = "{", tkCurlyRi = "}",
@@ -313,8 +314,7 @@ proc getNumber(L: var Lexer, result: var Token) =
proc lexMessageLitNum(L: var Lexer, msg: string, startpos: int, msgKind = errGenerated) =
# Used to get slightly human friendlier err messages.
const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'}
const literalishChars = {'A'..'Z', 'a'..'z', '0'..'9', '_', '.', '\''}
var msgPos = L.bufpos
var t: Token
t.literal = ""
@@ -326,15 +326,14 @@ proc getNumber(L: var Lexer, result: var Token) =
t.literal.add(L.buf[L.bufpos])
inc(L.bufpos)
matchChars(L, t, literalishChars)
if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
inc(L.bufpos)
if L.buf[L.bufpos] in literalishChars:
t.literal.add(L.buf[L.bufpos])
inc(L.bufpos)
matchChars(L, t, {'0'..'9'})
L.bufpos = msgPos
lexMessage(L, msgKind, msg % t.literal)
var
startpos, endpos: int
xi: BiggestInt
isBase10 = true
numDigits = 0
@@ -346,7 +345,7 @@ proc getNumber(L: var Lexer, result: var Token) =
result.tokType = tkIntLit # int literal until we know better
result.literal = ""
result.base = base10
startpos = L.bufpos
let startpos = L.bufpos
tokenBegin(result, startpos)
var isPositive = true
@@ -395,201 +394,187 @@ proc getNumber(L: var Lexer, result: var Token) =
discard matchUnderscoreChars(L, result, {'0'..'9'})
if L.buf[L.bufpos] in {'e', 'E'}:
result.tokType = tkFloatLit
eatChar(L, result, 'e')
eatChar(L, result)
if L.buf[L.bufpos] in {'+', '-'}:
eatChar(L, result)
discard matchUnderscoreChars(L, result, {'0'..'9'})
endpos = L.bufpos
let endpos = L.bufpos
# Second stage, find out if there's a datatype suffix and handle it
var postPos = endpos
if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
let errPos = postPos
var customLitPossible = false
if L.buf[postPos] == '\'':
inc(postPos)
customLitPossible = true
case L.buf[postPos]
of 'f', 'F':
inc(postPos)
if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
result.tokType = tkFloat32Lit
inc(postPos, 2)
elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
result.tokType = tkFloat64Lit
inc(postPos, 2)
elif (L.buf[postPos] == '1') and
(L.buf[postPos + 1] == '2') and
(L.buf[postPos + 2] == '8'):
result.tokType = tkFloat128Lit
inc(postPos, 3)
else: # "f" alone defaults to float32
result.tokType = tkFloat32Lit
of 'd', 'D': # ad hoc convenience shortcut for f64
inc(postPos)
result.tokType = tkFloat64Lit
of 'i', 'I':
inc(postPos)
if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
result.tokType = tkInt64Lit
inc(postPos, 2)
elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
result.tokType = tkInt32Lit
inc(postPos, 2)
elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
result.tokType = tkInt16Lit
inc(postPos, 2)
elif (L.buf[postPos] == '8'):
result.tokType = tkInt8Lit
inc(postPos)
if L.buf[postPos] in SymChars:
var suffixAsLower = newStringOfCap(10)
var suffix = newStringOfCap(10)
while true:
let c = L.buf[postPos]
suffix.add c
suffixAsLower.add toLowerAscii(c)
inc postPos
if L.buf[postPos] notin SymChars+{'_'}: break
case suffix
of "f", "f32": result.tokType = tkFloat32Lit
of "d", "f64": result.tokType = tkFloat64Lit
of "f128": result.tokType = tkFloat128Lit
of "i8": result.tokType = tkInt8Lit
of "i16": result.tokType = tkInt16Lit
of "i32": result.tokType = tkInt32Lit
of "i64": result.tokType = tkInt64Lit
of "u": result.tokType = tkUIntLit
of "u8": result.tokType = tkUInt8Lit
of "u16": result.tokType = tkUInt16Lit
of "u32": result.tokType = tkUInt32Lit
of "u64": result.tokType = tkUInt64Lit
else:
lexMessageLitNum(L, "invalid number: '$1'", startpos)
of 'u', 'U':
inc(postPos)
if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
result.tokType = tkUInt64Lit
inc(postPos, 2)
elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
result.tokType = tkUInt32Lit
inc(postPos, 2)
elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
result.tokType = tkUInt16Lit
inc(postPos, 2)
elif (L.buf[postPos] == '8'):
result.tokType = tkUInt8Lit
inc(postPos)
else:
result.tokType = tkUIntLit
if customLitPossible:
# remember the position of the ``'`` so that the parser doesn't
# have to reparse the custom literal:
result.iNumber = len(result.literal)
result.literal.add '\''
result.literal.add suffix
result.tokType = tkCustomLit
else:
lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
else:
lexMessageLitNum(L, "invalid number: '$1'", startpos)
lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
# Is there still a literalish char awaiting? Then it's an error!
if L.buf[postPos] in literalishChars or
(L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
lexMessageLitNum(L, "invalid number: '$1'", startpos)
# Third stage, extract actual number
L.bufpos = startpos # restore position
var pos = startpos
try:
if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
inc(pos, 2)
xi = 0 # it is a base prefix
if result.tokType != tkCustomLit:
# Third stage, extract actual number
L.bufpos = startpos # restore position
var pos = startpos
try:
if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
inc(pos, 2)
xi = 0 # it is a base prefix
case L.buf[pos - 1]
of 'b', 'B':
result.base = base2
while pos < endpos:
if L.buf[pos] != '_':
xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
# 'c', 'C' is deprecated
of 'o', 'c', 'C':
result.base = base8
while pos < endpos:
if L.buf[pos] != '_':
xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
of 'x', 'X':
result.base = base16
while pos < endpos:
case L.buf[pos]
of '_':
case L.buf[pos - 1]
of 'b', 'B':
result.base = base2
while pos < endpos:
if L.buf[pos] != '_':
xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
of '0'..'9':
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
# 'c', 'C' is deprecated
of 'o', 'c', 'C':
result.base = base8
while pos < endpos:
if L.buf[pos] != '_':
xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
of 'a'..'f':
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
inc(pos)
of 'A'..'F':
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
inc(pos)
else:
break
of 'x', 'X':
result.base = base16
while pos < endpos:
case L.buf[pos]
of '_':
inc(pos)
of '0'..'9':
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
of 'a'..'f':
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
inc(pos)
of 'A'..'F':
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
inc(pos)
else:
break
else:
internalError(L.config, getLineInfo(L), "getNumber")
case result.tokType
of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
of tkFloat32Lit:
setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
# note: this code is endian neutral!
# XXX: Test this on big endian machine!
of tkFloat64Lit, tkFloatLit:
setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
else: internalError(L.config, getLineInfo(L), "getNumber")
# Bounds checks. Non decimal literals are allowed to overflow the range of
# the datatype as long as their pattern don't overflow _bitwise_, hence
# below checks of signed sizes against uint*.high is deliberate:
# (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
if result.tokType notin floatTypes:
let outOfRange =
case result.tokType
of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
of tkInt8Lit: (xi > BiggestInt(uint8.high))
of tkInt16Lit: (xi > BiggestInt(uint16.high))
of tkInt32Lit: (xi > BiggestInt(uint32.high))
else: false
if outOfRange:
#echo "out of range num: ", result.iNumber, " vs ", xi
lexMessageLitNum(L, "number out of range: '$1'", startpos)
else:
internalError(L.config, getLineInfo(L), "getNumber")
case result.tokType
of floatTypes:
result.fNumber = parseFloat(result.literal)
of tkUInt64Lit, tkUIntLit:
var iNumber: uint64
var len: int
try:
len = parseBiggestUInt(result.literal, iNumber)
except ValueError:
raise newException(OverflowDefect, "number out of range: " & $result.literal)
if len != result.literal.len:
raise newException(ValueError, "invalid integer: " & $result.literal)
result.iNumber = cast[int64](iNumber)
else:
var iNumber: int64
var len: int
try:
len = parseBiggestInt(result.literal, iNumber)
except ValueError:
raise newException(OverflowDefect, "number out of range: " & $result.literal)
if len != result.literal.len:
raise newException(ValueError, "invalid integer: " & $result.literal)
result.iNumber = iNumber
case result.tokType
of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
of tkFloat32Lit:
setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
# note: this code is endian neutral!
# XXX: Test this on big endian machine!
of tkFloat64Lit, tkFloatLit:
setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
else: internalError(L.config, getLineInfo(L), "getNumber")
# Bounds checks. Non decimal literals are allowed to overflow the range of
# the datatype as long as their pattern don't overflow _bitwise_, hence
# below checks of signed sizes against uint*.high is deliberate:
# (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
if result.tokType notin floatTypes:
# Explicit bounds checks.
let outOfRange =
case result.tokType
of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
of tkInt8Lit: (xi > BiggestInt(uint8.high))
of tkInt16Lit: (xi > BiggestInt(uint16.high))
of tkInt32Lit: (xi > BiggestInt(uint32.high))
of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
else: false
if outOfRange:
#echo "out of range num: ", result.iNumber, " vs ", xi
lexMessageLitNum(L, "number out of range: '$1'", startpos)
else:
case result.tokType
of floatTypes:
result.fNumber = parseFloat(result.literal)
of tkUInt64Lit, tkUIntLit:
var iNumber: uint64
var len: int
try:
len = parseBiggestUInt(result.literal, iNumber)
except ValueError:
raise newException(OverflowDefect, "number out of range: " & $result.literal)
if len != result.literal.len:
raise newException(ValueError, "invalid integer: " & $result.literal)
result.iNumber = cast[int64](iNumber)
else:
var iNumber: int64
var len: int
try:
len = parseBiggestInt(result.literal, iNumber)
except ValueError:
raise newException(OverflowDefect, "number out of range: " & $result.literal)
if len != result.literal.len:
raise newException(ValueError, "invalid integer: " & $result.literal)
result.iNumber = iNumber
# Promote int literal to int64? Not always necessary, but more consistent
if result.tokType == tkIntLit:
if result.iNumber > high(int32) or result.iNumber < low(int32):
result.tokType = tkInt64Lit
# Explicit bounds checks.
let outOfRange =
case result.tokType
of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
else: false
if outOfRange:
lexMessageLitNum(L, "number out of range: '$1'", startpos)
# Promote int literal to int64? Not always necessary, but more consistent
if result.tokType == tkIntLit:
if result.iNumber > high(int32) or result.iNumber < low(int32):
result.tokType = tkInt64Lit
except ValueError:
lexMessageLitNum(L, "invalid number: '$1'", startpos)
except OverflowDefect, RangeDefect:
lexMessageLitNum(L, "number out of range: '$1'", startpos)
except ValueError:
lexMessageLitNum(L, "invalid number: '$1'", startpos)
except OverflowDefect, RangeDefect:
lexMessageLitNum(L, "number out of range: '$1'", startpos)
tokenEnd(result, postPos-1)
L.bufpos = postPos
@@ -830,8 +815,9 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
inc(pos)
L.bufpos = pos
proc getCharacter(L: var Lexer, tok: var Token) =
proc getCharacter(L: var Lexer; tok: var Token) =
tokenBegin(tok, L.bufpos)
let startPos = L.bufpos
inc(L.bufpos) # skip '
var c = L.buf[L.bufpos]
case c
@@ -842,10 +828,16 @@ proc getCharacter(L: var Lexer, tok: var Token) =
else:
tok.literal = $c
inc(L.bufpos)
if L.buf[L.bufpos] != '\'':
lexMessage(L, errGenerated, "missing closing ' for character literal")
tokenEndIgnore(tok, L.bufpos)
inc(L.bufpos) # skip '
if L.buf[L.bufpos] == '\'':
tokenEndIgnore(tok, L.bufpos)
inc(L.bufpos) # skip '
else:
if startPos > 0 and L.buf[startPos-1] == '`':
tok.literal = "'"
L.bufpos = startPos+1
else:
lexMessage(L, errGenerated, "missing closing ' for character literal")
tokenEndIgnore(tok, L.bufpos)
proc getSymbol(L: var Lexer, tok: var Token) =
var h: Hash = 0

View File

@@ -355,7 +355,7 @@ proc parseSymbol(p: var Parser, mode = smNormal): PNode =
let node = newNodeI(nkIdent, lineinfo)
node.ident = p.lex.cache.getIdent(accm)
result.add(node)
of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCharLit:
of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCustomLit:
result.add(newIdentNodeP(p.lex.cache.getIdent($p.tok), p))
getTok(p)
else:
@@ -627,7 +627,7 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode =
#| | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
#| | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
#| | STR_LIT | RSTR_LIT | TRIPLESTR_LIT
#| | CHAR_LIT
#| | CHAR_LIT | CUSTOM_NUMERIC_LIT
#| | NIL
#| generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
#| identOrLiteral = generalizedLit | symbol | literal
@@ -710,6 +710,14 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode =
of tkCharLit:
result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
getTok(p)
of tkCustomLit:
let splitPos = p.tok.iNumber.int
let str = newStrNodeP(nkRStrLit, p.tok.literal.substr(0, splitPos-1), p)
let callee = newIdentNodeP(getIdent(p.lex.cache, p.tok.literal.substr(splitPos)), p)
result = newNodeP(nkDotExpr, p)
result.add str
result.add callee
getTok(p)
of tkNil:
result = newNodeP(nkNilLit, p)
getTok(p)
@@ -807,7 +815,7 @@ proc primarySuffix(p: var Parser, r: PNode,
result = commandExpr(p, result, mode)
break
result = namedParams(p, result, nkCurlyExpr, tkCurlyRi)
of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast,
of tkSymbol, tkAccent, tkIntLit..tkCustomLit, tkNil, tkCast,
tkOpr, tkDotDot, tkVar, tkOut, tkStatic, tkType, tkEnum, tkTuple,
tkObject, tkProc:
# XXX: In type sections we allow the free application of the
@@ -1097,7 +1105,7 @@ proc isExprStart(p: Parser): bool =
case p.tok.tokType
of tkSymbol, tkAccent, tkOpr, tkNot, tkNil, tkCast, tkIf, tkFor,
tkProc, tkFunc, tkIterator, tkBind, tkBuiltInMagics,
tkParLe, tkBracketLe, tkCurlyLe, tkIntLit..tkCharLit, tkVar, tkRef, tkPtr,
tkParLe, tkBracketLe, tkCurlyLe, tkIntLit..tkCustomLit, tkVar, tkRef, tkPtr,
tkTuple, tkObject, tkWhen, tkCase, tkOut:
result = true
else: result = false
@@ -1498,7 +1506,7 @@ proc parseReturnOrRaise(p: var Parser, kind: TNodeKind): PNode =
#| yieldStmt = 'yield' optInd expr?
#| discardStmt = 'discard' optInd expr?
#| breakStmt = 'break' optInd expr?
#| continueStmt = 'break' optInd expr?
#| continueStmt = 'continue' optInd expr?
result = newNodeP(kind, p)
getTok(p)
if p.tok.tokType == tkComment:

View File

@@ -942,7 +942,7 @@ proc skipHiddenNodes(n: PNode): PNode =
else: break
proc accentedName(g: var TSrcGen, n: PNode) =
const backticksNeeded = OpChars + {'[', '{'}
const backticksNeeded = OpChars + {'[', '{', '\''}
if n == nil: return
let isOperator =
if n.kind == nkIdent and n.ident.s.len > 0 and n.ident.s[0] in backticksNeeded: true
@@ -976,6 +976,11 @@ proc infixArgument(g: var TSrcGen, n: PNode, i: int) =
if needsParenthesis:
put(g, tkParRi, ")")
proc isCustomLit(n: PNode): bool =
n.len == 2 and n[0].kind == nkRStrLit and
(n[1].kind == nkIdent and n[1].ident.s.startsWith('\'')) or
(n[1].kind == nkSym and n[1].sym.name.s.startsWith('\''))
proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
if isNil(n): return
var
@@ -1195,9 +1200,14 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
gcomma(g, n, c)
put(g, tkBracketRi, "]")
of nkDotExpr:
gsub(g, n, 0)
put(g, tkDot, ".")
gsub(g, n, 1)
if isCustomLit(n):
put(g, tkCustomLit, n[0].strVal)
gsub(g, n, 1)
else:
gsub(g, n, 0)
put(g, tkDot, ".")
if n.len > 1:
accentedName(g, n[1])
of nkBind:
putWithSpace(g, tkBind, "bind")
gsub(g, n, 0)

View File

@@ -1524,7 +1524,7 @@ proc semProcAnnotation(c: PContext, prc: PNode;
return
proc semInferredLambda(c: PContext, pt: TIdTable, n: PNode): PNode {.nosinks.} =
## used for resolving 'auto' in lambdas based on their callsite
## used for resolving 'auto' in lambdas based on their callsite
var n = n
let original = n[namePos].sym
let s = original #copySym(original, false)

View File

@@ -46,7 +46,7 @@ literal = | INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
| UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
| FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
| STR_LIT | RSTR_LIT | TRIPLESTR_LIT
| CHAR_LIT
| CHAR_LIT | CUSTOM_NUMERIC_LIT
| NIL
generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
identOrLiteral = generalizedLit | symbol | literal
@@ -100,6 +100,7 @@ postExprBlocks = ':' stmt? ( IND{=} doBlock
| IND{=} 'of' exprList ':' stmt
| IND{=} 'elif' expr ':' stmt
| IND{=} 'except' exprList ':' stmt
| IND{=} 'finally' ':' stmt
| IND{=} 'else' ':' stmt )*
exprStmt = simpleExpr
(( '=' optInd expr colonBody? )

View File

@@ -490,11 +490,17 @@ this. Another reason is that Nim can thus support `array[char, int]` or
type is used for Unicode characters, it can represent any Unicode character.
`Rune` is declared in the `unicode module <unicode.html>`_.
A character literal that does not end in ``'`` interpreted as ``'`` if there
is a preceeding backtick token. There must be no whitespace between the preceeding
backtick token and the character literal. This special rule ensures that a declaration
like ``proc `'customLiteral`(s: string)`` is valid. See also
`Custom Numeric Literals <#custom-numeric-literals>`_.
Numerical constants
-------------------
Numerical constants are of a single type and have the form::
Numeric Literals
----------------
Numeric literals have the form::
hexdigit = digit | 'A'..'F' | 'a'..'f'
octdigit = '0'..'7'
@@ -530,8 +536,10 @@ Numerical constants are of a single type and have the form::
FLOAT64_LIT = HEX_LIT '\'' FLOAT64_SUFFIX
| (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) ['\''] FLOAT64_SUFFIX
CUSTOM_NUMERIC_LIT = (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) '\'' CUSTOM_NUMERIC_SUFFIX
As can be seen in the productions, numerical constants can contain underscores
As can be seen in the productions, numeric literals can contain underscores
for readability. Integer and floating-point literals may be given in decimal (no
prefix), binary (prefix `0b`), octal (prefix `0o`), and hexadecimal
(prefix `0x`) notation.
@@ -579,7 +587,7 @@ is optional if it is not ambiguous (only hexadecimal floating-point literals
with a type suffix can be ambiguous).
The type suffixes are:
The pre-defined type suffixes are:
================= =========================
Type Suffix Resulting type of literal
@@ -611,6 +619,42 @@ the bit width of the datatype, it is accepted.
Hence: 0b10000000'u8 == 0x80'u8 == 128, but, 0b10000000'i8 == 0x80'i8 == -1
instead of causing an overflow error.
Custom Numeric Literals
~~~~~~~~~~~~~~~~~~~~~~~
If the suffix is not predefined, then the suffix is assumed to be a call
to a proc, template, macro or other callable identifier that is passed the
string containing the literal. The callable identifier needs to be declared
with a special ``'`` prefix:
.. code-block:: nim
import strutils
type u4 = distinct uint8 # a 4-bit unsigned integer aka "nibble"
proc `'u4`(n: string): u4 =
# The leading ' is required.
result = (parseInt(n) and 0x0F).u4
var x = 5'u4
More formally, a custom numeric literal `123'custom` is transformed
to r"123".`'custom` in the parsing step. There is no AST node kind that
corresponds to this transformation. The transformation naturally handles
the case that additional parameters are passed to the callee:
.. code-block:: nim
import strutils
type u4 = distinct uint8 # a 4-bit unsigned integer aka "nibble"
proc `'u4`(n: string; moreData: int): u4 =
result = (parseInt(n) and 0x0F).u4
var x = 5'u4(123)
Custom numeric literals are covered by the grammar rule named `CUSTOM_NUMERIC_LIT`.
Operators
---------

View File

@@ -0,0 +1,9 @@
import macros
macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
macro assertAST*(expected: string, struct: untyped): untyped =
var ast = newLit(struct.treeRepr)
result = quote do:
if `ast` != `expected`:
doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)

View File

@@ -0,0 +1,150 @@
discard """
targets: "c cpp js"
"""
# Test tkStrNumLit
import std/[macros, strutils]
import mlexerutils
# AST checks
assertAST dedent """
StmtList
ProcDef
AccQuoted
Ident "\'"
Ident "wrap"
Empty
Empty
FormalParams
Ident "string"
IdentDefs
Ident "number"
Ident "string"
Empty
Empty
Empty
StmtList
Asgn
Ident "result"
Infix
Ident "&"
Infix
Ident "&"
StrLit "[["
Ident "number"
StrLit "]]"""":
proc `'wrap`(number: string): string =
result = "[[" & number & "]]"
assertAST dedent """
StmtList
DotExpr
RStrLit "-38383839292839283928392839283928392839283.928493849385935898243e-50000"
Ident "\'wrap"""":
-38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap
proc `'wrap`(number: string): string = "[[" & number & "]]"
doAssert lispReprStr(-1'wrap) == """(DotExpr (RStrLit "-1") (Ident "\'wrap"))"""
template main =
block: # basic suffix usage
template `'twrap`(number: string): untyped =
number.`'wrap`
proc extraContext(): string =
22.40'wrap
proc `*`(left, right: string): string =
result = left & "times" & right
proc `+`(left, right: string): string =
result = left & "plus" & right
doAssert 1'wrap == "[[1]]"
doAssert -1'wrap == "[[-1]]":
"unable to resolve a negative integer-suffix pattern"
doAssert 12345.67890'wrap == "[[12345.67890]]"
doAssert 1'wrap*1'wrap == "[[1]]times[[1]]":
"unable to resolve an operator between two suffixed numeric literals"
doAssert 1'wrap+ -1'wrap == "[[1]]plus[[-1]]": # will generate a compiler warning about inconsistent spacing
"unable to resolve a negative suffixed numeric literal following an operator"
doAssert 1'wrap + -1'wrap == "[[1]]plus[[-1]]"
doAssert 1'twrap == "[[1]]"
doAssert extraContext() == "[[22.40]]":
"unable to return a suffixed numeric literal by an implicit return"
doAssert 0x5a3a'wrap == "[[0x5a3a]]"
doAssert 0o5732'wrap == "[[0o5732]]"
doAssert 0b0101111010101'wrap == "[[0b0101111010101]]"
doAssert -38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap == "[[-38383839292839283928392839283928392839283.928493849385935898243e-50000]]"
doAssert 1234.56'wrap == "[[1234.56]]":
"unable to properly account for context with suffixed numeric literals"
block: # verify that the i64, f32, etc builtin suffixes still parse correctly
const expectedF32: float32 = 123.125
proc `'f9`(number: string): string = # proc starts with 'f' just like 'f32'
"[[" & number & "]]"
proc `'f32a`(number: string): string = # looks even more like 'f32'
"[[" & number & "]]"
proc `'d9`(number: string): string = # proc starts with 'd' just like the d suffix
"[[" & number & "]]"
proc `'i9`(number: string): string = # proc starts with 'i' just like 'i64'
"[[" & number & "]]"
proc `'u9`(number: string): string = # proc starts with 'u' just like 'u8'
"[[" & number & "]]"
doAssert 123.125f32 == expectedF32:
"failing to support non-quoted legacy f32 floating point suffix"
doAssert 123.125'f32 == expectedF32
doAssert 123.125e0'f32 == expectedF32
doAssert 1234.56'wrap == 1234.56'f9
doAssert 1234.56'wrap == 1234.56'f32a
doAssert 1234.56'wrap == 1234.56'd9
doAssert 1234.56'wrap == 1234.56'i9
doAssert 1234.56'wrap == 1234.56'u9
doAssert lispReprStr(1234.56'u9) == """(DotExpr (RStrLit "1234.56") (Ident "\'u9"))""":
"failed to properly build AST for suffix that starts with u"
doAssert -128'i8 == (-128).int8
block: # case checks
doAssert 1E2 == 100:
"lexer not handling upper-case exponent"
doAssert 1.0E2 == 100.0
doAssert 1e2 == 100
doAssert 0xdeadBEEF'wrap == "[[0xdeadBEEF]]":
"lexer not maintaining original case"
doAssert 0.1E12'wrap == "[[0.1E12]]"
doAssert 0.0e12'wrap == "[[0.0e12]]"
doAssert 0.0e+12'wrap == "[[0.0e+12]]"
doAssert 0.0e-12'wrap == "[[0.0e-12]]"
doAssert 0e-12'wrap == "[[0e-12]]"
block: # macro and template usage
template `'foo`(a: string): untyped = (a, 2)
doAssert -12'foo == ("-12", 2)
template `'fooplus`(a: string, b: int): untyped = (a, b)
doAssert -12'fooplus(2) == ("-12", 2)
template `'fooplusopt`(a: string, b: int = 99): untyped = (a, b)
doAssert -12'fooplusopt(2) == ("-12", 2)
doAssert -12'fooplusopt() == ("-12", 99)
doAssert -12'fooplusopt == ("-12", 99)
macro `'bar`(a: static string): untyped =
var infix = newNimNode(nnkInfix)
infix.add newIdentNode("&")
infix.add newLit("got ")
infix.add newLit(a.repr)
result = newNimNode(nnkStmtList)
result.add infix
doAssert -12'bar == "got \"-12\""
macro deb(a): untyped = newLit(a.repr)
doAssert deb(-12'bar) == "-12'bar"
# macro metawrap(): untyped =
# func wrap1(a: string): string = "{" & a & "}"
# func `'wrap2`(a: string): string = "{" & a & "}"
# result = quote do:
# let a1 = wrap1"-128"
# let a2 = -128'wrap2
# metawrap()
# doAssert a1 == "{-128}"
# doAssert a2 == "{-128}"
static: main()
main()

View File

@@ -1,19 +0,0 @@
discard """
output: "a\"\"long string\"\"\"\"\"abc\"def_'2'●𝌆𝌆A"
"""
# Test the new different string literals
const
tripleEmpty = """"long string"""""""" # "long string """""
rawQuote = r"a"""
raw = r"abc""def"
escaped = "\x5f'\50'\u25cf\u{1D306}\u{1d306}\u{41}"
stdout.write(rawQuote)
stdout.write(tripleEmpty)
stdout.write(raw)
stdout.writeLine(escaped)

View File

@@ -6,13 +6,7 @@ discard """
import std/[macros, strutils]
macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
macro assertAST*(expected: string, struct: untyped): untyped =
var ast = newLit(struct.treeRepr)
result = quote do:
if `ast` != `expected`:
doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
import mlexerutils
const one = 1
const minusOne = `-`(one)