breaking change: 'concept' is now a keyword and used instead of 'generic'

This commit is contained in:
Araq
2015-03-23 00:51:04 +01:00
parent 210fab10b2
commit f6ff01572e
15 changed files with 462 additions and 475 deletions

View File

@@ -10,51 +10,51 @@
# This scanner is handwritten for efficiency. I used an elegant buffering
# scheme which I have not seen anywhere else:
# We guarantee that a whole line is in the buffer. Thus only when scanning
# the \n or \r character we have to check wether we need to read in the next
# the \n or \r character we have to check wether we need to read in the next
# chunk. (\n or \r already need special handling for incrementing the line
# counter; choosing both \n and \r allows the scanner to properly read Unix,
# DOS or Macintosh text files, even when it is not the native format.
import
import
hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
wordrecg
const
const
MaxLineLength* = 80 # lines longer than this lead to a warning
numChars*: set[char] = {'0'..'9', 'a'..'z', 'A'..'Z'}
SymChars*: set[char] = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
SymStartChars*: set[char] = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'}
OpChars*: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
OpChars*: set[char] = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
'|', '=', '%', '&', '$', '@', '~', ':', '\x80'..'\xFF'}
# don't forget to update the 'highlite' module if these charsets should change
type
TTokType* = enum
type
TTokType* = enum
tkInvalid, tkEof, # order is important here!
tkSymbol, # keywords:
tkAddr, tkAnd, tkAs, tkAsm, tkAtomic,
tkBind, tkBlock, tkBreak, tkCase, tkCast,
tkConst, tkContinue, tkConverter,
tkAddr, tkAnd, tkAs, tkAsm, tkAtomic,
tkBind, tkBlock, tkBreak, tkCase, tkCast,
tkConcept, tkConst, tkContinue, tkConverter,
tkDefer, tkDiscard, tkDistinct, tkDiv, tkDo,
tkElif, tkElse, tkEnd, tkEnum, tkExcept, tkExport,
tkFinally, tkFor, tkFrom, tkFunc,
tkGeneric, tkIf, tkImport, tkIn, tkInclude, tkInterface,
tkGeneric, tkIf, tkImport, tkIn, tkInclude, tkInterface,
tkIs, tkIsnot, tkIterator,
tkLet,
tkMacro, tkMethod, tkMixin, tkMod, tkNil, tkNot, tkNotin,
tkObject, tkOf, tkOr, tkOut,
tkMacro, tkMethod, tkMixin, tkMod, tkNil, tkNot, tkNotin,
tkObject, tkOf, tkOr, tkOut,
tkProc, tkPtr, tkRaise, tkRef, tkReturn, tkShl, tkShr, tkStatic,
tkTemplate,
tkTry, tkTuple, tkType, tkUsing,
tkTemplate,
tkTry, tkTuple, tkType, tkUsing,
tkVar, tkWhen, tkWhile, tkWith, tkWithout, tkXor,
tkYield, # end of keywords
tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit,
tkUIntLit, tkUInt8Lit, tkUInt16Lit, tkUInt32Lit, tkUInt64Lit,
tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkFloat128Lit,
tkStrLit, tkRStrLit, tkTripleStrLit,
tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe,
tkBracketRi, tkCurlyLe, tkCurlyRi,
tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe,
tkBracketRi, tkCurlyLe, tkCurlyRi,
tkBracketDotLe, tkBracketDotRi, # [. and .]
tkCurlyDotLe, tkCurlyDotRi, # {. and .}
tkParDotLe, tkParDotRi, # (. and .)
@@ -62,27 +62,27 @@ type
tkColon, tkColonColon, tkEquals, tkDot, tkDotDot,
tkOpr, tkComment, tkAccent,
tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr,
TTokTypes* = set[TTokType]
const
const
tokKeywordLow* = succ(tkSymbol)
tokKeywordHigh* = pred(tkIntLit)
TokTypeToStr*: array[TTokType, string] = ["tkInvalid", "[EOF]",
TokTypeToStr*: array[TTokType, string] = ["tkInvalid", "[EOF]",
"tkSymbol",
"addr", "and", "as", "asm", "atomic",
"bind", "block", "break", "case", "cast",
"const", "continue", "converter",
"addr", "and", "as", "asm", "atomic",
"bind", "block", "break", "case", "cast",
"concept", "const", "continue", "converter",
"defer", "discard", "distinct", "div", "do",
"elif", "else", "end", "enum", "except", "export",
"finally", "for", "from", "func", "generic", "if",
"finally", "for", "from", "func", "generic", "if",
"import", "in", "include", "interface", "is", "isnot", "iterator",
"let",
"macro", "method", "mixin", "mod",
"nil", "not", "notin", "object", "of", "or",
"out", "proc", "ptr", "raise", "ref", "return",
"macro", "method", "mixin", "mod",
"nil", "not", "notin", "object", "of", "or",
"out", "proc", "ptr", "raise", "ref", "return",
"shl", "shr", "static",
"template",
"template",
"try", "tuple", "type", "using",
"var", "when", "while", "with", "without", "xor",
"yield",
@@ -90,7 +90,7 @@ const
"tkUIntLit", "tkUInt8Lit", "tkUInt16Lit", "tkUInt32Lit", "tkUInt64Lit",
"tkFloatLit", "tkFloat32Lit", "tkFloat64Lit", "tkFloat128Lit",
"tkStrLit", "tkRStrLit",
"tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(",
"tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(",
")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)",
",", ";",
":", "::", "=", ".", "..",
@@ -98,8 +98,8 @@ const
"tkSpaces", "tkInfixOpr",
"tkPrefixOpr", "tkPostfixOpr"]
type
TNumericalBase* = enum
type
TNumericalBase* = enum
base10, # base10 is listed as the first element,
# so that it is the correct default value
base2, base8, base16
@@ -148,45 +148,45 @@ proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) =
proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "")
proc isKeyword(kind: TTokType): bool =
proc isKeyword(kind: TTokType): bool =
result = (kind >= tokKeywordLow) and (kind <= tokKeywordHigh)
proc isNimIdentifier*(s: string): bool =
if s[0] in SymStartChars:
var i = 1
while i < s.len:
if s[i] == '_':
if s[i] == '_':
inc(i)
if s[i] notin SymChars: return
if s[i] notin SymChars: return
inc(i)
result = true
proc tokToStr*(tok: TToken): string =
proc tokToStr*(tok: TToken): string =
case tok.tokType
of tkIntLit..tkInt64Lit: result = $tok.iNumber
of tkFloatLit..tkFloat64Lit: result = $tok.fNumber
of tkInvalid, tkStrLit..tkCharLit, tkComment: result = tok.literal
of tkParLe..tkColon, tkEof, tkAccent:
of tkParLe..tkColon, tkEof, tkAccent:
result = TokTypeToStr[tok.tokType]
else:
if tok.ident != nil:
result = tok.ident.s
else:
else:
internalError("tokToStr")
result = ""
proc prettyTok*(tok: TToken): string =
if isKeyword(tok.tokType): result = "keyword " & tok.ident.s
else: result = tokToStr(tok)
proc printTok*(tok: TToken) =
proc printTok*(tok: TToken) =
msgWriteln($tok.line & ":" & $tok.col & "\t" &
TokTypeToStr[tok.tokType] & " " & tokToStr(tok))
var dummyIdent: PIdent
proc initToken*(L: var TToken) =
proc initToken*(L: var TToken) =
L.tokType = tkInvalid
L.iNumber = 0
L.indent = 0
@@ -196,7 +196,7 @@ proc initToken*(L: var TToken) =
L.base = base10
L.ident = dummyIdent
proc fillToken(L: var TToken) =
proc fillToken(L: var TToken) =
L.tokType = tkInvalid
L.iNumber = 0
L.indent = 0
@@ -205,22 +205,22 @@ proc fillToken(L: var TToken) =
L.fNumber = 0.0
L.base = base10
L.ident = dummyIdent
proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) =
proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) =
openBaseLexer(lex, inputstream)
lex.fileIdx = fileidx
lex.indentAhead = - 1
lex.currLineIndent = 0
inc(lex.lineNumber, inputstream.lineOffset)
inc(lex.lineNumber, inputstream.lineOffset)
proc closeLexer(lex: var TLexer) =
proc closeLexer(lex: var TLexer) =
inc(gLinesCompiled, lex.lineNumber)
closeBaseLexer(lex)
proc getColumn(L: TLexer): int =
proc getColumn(L: TLexer): int =
result = getColNumber(L, L.bufpos)
proc getLineInfo(L: TLexer): TLineInfo =
proc getLineInfo(L: TLexer): TLineInfo =
result = newLineInfo(L.fileIdx, L.lineNumber, getColNumber(L, L.bufpos))
proc dispMessage(L: TLexer; info: TLineInfo; msg: TMsgKind; arg: string) =
@@ -236,24 +236,24 @@ proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") =
var info = newLineInfo(L.fileIdx, L.lineNumber, pos - L.lineStart)
L.dispMessage(info, msg, arg)
proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) =
proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) =
var pos = L.bufpos # use registers for pos, buf
var buf = L.buf
while true:
if buf[pos] in chars:
while true:
if buf[pos] in chars:
add(tok.literal, buf[pos])
inc(pos)
else:
break
if buf[pos] == '_':
if buf[pos+1] notin chars:
else:
break
if buf[pos] == '_':
if buf[pos+1] notin chars:
lexMessage(L, errInvalidToken, "_")
break
add(tok.literal, '_')
inc(pos)
L.bufpos = pos
proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool =
proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool =
result = (L.buf[L.bufpos] == first) and (L.buf[L.bufpos + 1] in second)
proc isFloatLiteral(s: string): bool =
@@ -275,8 +275,8 @@ proc unsafeParseUInt(s: string, b: var BiggestInt, start = 0): int =
result = i - start
{.pop.} # overflowChecks
proc getNumber(L: var TLexer): TToken =
var
proc getNumber(L: var TLexer): TToken =
var
pos, endpos: int
xi: BiggestInt
# get the base:
@@ -290,15 +290,15 @@ proc getNumber(L: var TLexer): TToken =
else:
matchUnderscoreChars(L, result, {'0'..'9', 'b', 'B', 'o', 'c', 'C'})
eallowed = true
if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
add(result.literal, '.')
inc(L.bufpos)
matchUnderscoreChars(L, result, {'0'..'9'})
eallowed = true
if eallowed and L.buf[L.bufpos] in {'e', 'E'}:
if eallowed and L.buf[L.bufpos] in {'e', 'E'}:
add(result.literal, 'e')
inc(L.bufpos)
if L.buf[L.bufpos] in {'+', '-'}:
if L.buf[L.bufpos] in {'+', '-'}:
add(result.literal, L.buf[L.bufpos])
inc(L.bufpos)
matchUnderscoreChars(L, result, {'0'..'9'})
@@ -307,7 +307,7 @@ proc getNumber(L: var TLexer): TToken =
if L.buf[endpos] == '\'': inc(endpos)
L.bufpos = pos # restore position
case L.buf[endpos]
of 'f', 'F':
of 'f', 'F':
inc(endpos)
if (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'):
result.tokType = tkFloat32Lit
@@ -320,36 +320,36 @@ proc getNumber(L: var TLexer): TToken =
(L.buf[endpos + 2] == '8'):
result.tokType = tkFloat128Lit
inc(endpos, 3)
else:
else:
lexMessage(L, errInvalidNumber, result.literal & "'f" & L.buf[endpos])
of 'i', 'I':
of 'i', 'I':
inc(endpos)
if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'):
if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'):
result.tokType = tkInt64Lit
inc(endpos, 2)
elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'):
elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'):
result.tokType = tkInt32Lit
inc(endpos, 2)
elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'):
elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'):
result.tokType = tkInt16Lit
inc(endpos, 2)
elif (L.buf[endpos] == '8'):
elif (L.buf[endpos] == '8'):
result.tokType = tkInt8Lit
inc(endpos)
else:
else:
lexMessage(L, errInvalidNumber, result.literal & "'i" & L.buf[endpos])
of 'u', 'U':
inc(endpos)
if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'):
if (L.buf[endpos] == '6') and (L.buf[endpos + 1] == '4'):
result.tokType = tkUInt64Lit
inc(endpos, 2)
elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'):
elif (L.buf[endpos] == '3') and (L.buf[endpos + 1] == '2'):
result.tokType = tkUInt32Lit
inc(endpos, 2)
elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'):
elif (L.buf[endpos] == '1') and (L.buf[endpos + 1] == '6'):
result.tokType = tkUInt16Lit
inc(endpos, 2)
elif (L.buf[endpos] == '8'):
elif (L.buf[endpos] == '8'):
result.tokType = tkUInt8Lit
inc(endpos)
else:
@@ -357,45 +357,45 @@ proc getNumber(L: var TLexer): TToken =
else: lexMessage(L, errInvalidNumber, result.literal & "'" & L.buf[endpos])
else:
L.bufpos = pos # restore position
try:
try:
if (L.buf[pos] == '0') and
(L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}):
(L.buf[pos + 1] in {'x', 'X', 'b', 'B', 'o', 'O', 'c', 'C'}):
inc(pos, 2)
xi = 0 # it may be a base prefix
case L.buf[pos - 1] # now look at the optional type suffix:
of 'b', 'B':
of 'b', 'B':
result.base = base2
while true:
while true:
case L.buf[pos]
of '2'..'9', '.':
of '2'..'9', '.':
lexMessage(L, errInvalidNumber, result.literal)
inc(pos)
of '_':
if L.buf[pos+1] notin {'0'..'1'}:
of '_':
if L.buf[pos+1] notin {'0'..'1'}:
lexMessage(L, errInvalidToken, "_")
break
inc(pos)
of '0', '1':
of '0', '1':
xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
else: break
of 'o', 'c', 'C':
else: break
of 'o', 'c', 'C':
result.base = base8
while true:
while true:
case L.buf[pos]
of '8'..'9', '.':
of '8'..'9', '.':
lexMessage(L, errInvalidNumber, result.literal)
inc(pos)
of '_':
of '_':
if L.buf[pos+1] notin {'0'..'7'}:
lexMessage(L, errInvalidToken, "_")
break
inc(pos)
of '0'..'7':
of '0'..'7':
xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
inc(pos)
else: break
of 'O':
else: break
of 'O':
lexMessage(L, errInvalidNumber, result.literal)
of 'x', 'X':
result.base = base16
@@ -415,7 +415,7 @@ proc getNumber(L: var TLexer): TToken =
of 'A'..'F':
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
inc(pos)
else: break
else: break
else: internalError(getLineInfo(L), "getNumber")
case result.tokType
of tkIntLit, tkInt64Lit: result.iNumber = xi
@@ -426,14 +426,14 @@ proc getNumber(L: var TLexer): TToken =
of tkUInt8Lit: result.iNumber = BiggestInt(int8(toU8(int(xi))))
of tkUInt16Lit: result.iNumber = BiggestInt(toU16(int(xi)))
of tkUInt32Lit: result.iNumber = BiggestInt(toU32(xi))
of tkFloat32Lit:
result.fNumber = (cast[PFloat32](addr(xi)))[]
of tkFloat32Lit:
result.fNumber = (cast[PFloat32](addr(xi)))[]
# note: this code is endian neutral!
# XXX: Test this on big endian machine!
of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[]
of tkFloat64Lit: result.fNumber = (cast[PFloat64](addr(xi)))[]
else: internalError(getLineInfo(L), "getNumber")
elif isFloatLiteral(result.literal) or (result.tokType == tkFloat32Lit) or
(result.tokType == tkFloat64Lit):
(result.tokType == tkFloat64Lit):
result.fNumber = parseFloat(result.literal)
if result.tokType == tkIntLit: result.tokType = tkFloatLit
elif result.tokType == tkUint64Lit:
@@ -461,69 +461,69 @@ proc getNumber(L: var TLexer): TToken =
lexMessage(L, errNumberOutOfRange, result.literal)
L.bufpos = endpos
proc handleHexChar(L: var TLexer, xi: var int) =
proc handleHexChar(L: var TLexer, xi: var int) =
case L.buf[L.bufpos]
of '0'..'9':
of '0'..'9':
xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('0'))
inc(L.bufpos)
of 'a'..'f':
of 'a'..'f':
xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('a') + 10)
inc(L.bufpos)
of 'A'..'F':
of 'A'..'F':
xi = (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('A') + 10)
inc(L.bufpos)
else: discard
proc handleDecChars(L: var TLexer, xi: var int) =
while L.buf[L.bufpos] in {'0'..'9'}:
proc handleDecChars(L: var TLexer, xi: var int) =
while L.buf[L.bufpos] in {'0'..'9'}:
xi = (xi * 10) + (ord(L.buf[L.bufpos]) - ord('0'))
inc(L.bufpos)
proc getEscapedChar(L: var TLexer, tok: var TToken) =
proc getEscapedChar(L: var TLexer, tok: var TToken) =
inc(L.bufpos) # skip '\'
case L.buf[L.bufpos]
of 'n', 'N':
of 'n', 'N':
if tok.tokType == tkCharLit: lexMessage(L, errNnotAllowedInCharacter)
add(tok.literal, tnl)
inc(L.bufpos)
of 'r', 'R', 'c', 'C':
of 'r', 'R', 'c', 'C':
add(tok.literal, CR)
inc(L.bufpos)
of 'l', 'L':
of 'l', 'L':
add(tok.literal, LF)
inc(L.bufpos)
of 'f', 'F':
of 'f', 'F':
add(tok.literal, FF)
inc(L.bufpos)
of 'e', 'E':
of 'e', 'E':
add(tok.literal, ESC)
inc(L.bufpos)
of 'a', 'A':
of 'a', 'A':
add(tok.literal, BEL)
inc(L.bufpos)
of 'b', 'B':
of 'b', 'B':
add(tok.literal, BACKSPACE)
inc(L.bufpos)
of 'v', 'V':
of 'v', 'V':
add(tok.literal, VT)
inc(L.bufpos)
of 't', 'T':
of 't', 'T':
add(tok.literal, '\t')
inc(L.bufpos)
of '\'', '\"':
of '\'', '\"':
add(tok.literal, L.buf[L.bufpos])
inc(L.bufpos)
of '\\':
of '\\':
add(tok.literal, '\\')
inc(L.bufpos)
of 'x', 'X':
of 'x', 'X':
inc(L.bufpos)
var xi = 0
handleHexChar(L, xi)
handleHexChar(L, xi)
add(tok.literal, chr(xi))
of '0'..'9':
if matchTwoChars(L, '0', {'0'..'9'}):
of '0'..'9':
if matchTwoChars(L, '0', {'0'..'9'}):
lexMessage(L, warnOctalEscape)
var xi = 0
handleDecChars(L, xi)
@@ -540,7 +540,7 @@ proc newString(s: cstring, len: int): string =
proc handleCRLF(L: var TLexer, pos: int): int =
template registerLine =
let col = L.getColNumber(pos)
if col > MaxLineLength:
lexMessagePos(L, hintLineTooLong, pos)
@@ -548,7 +548,7 @@ proc handleCRLF(L: var TLexer, pos: int): int =
let lineStart = cast[ByteAddress](L.buf) + L.lineStart
let line = newString(cast[cstring](lineStart), col)
addSourceLine(L.fileIdx, line)
case L.buf[pos]
of CR:
registerLine()
@@ -557,12 +557,12 @@ proc handleCRLF(L: var TLexer, pos: int): int =
registerLine()
result = nimlexbase.handleLF(L, pos)
else: result = pos
proc getString(L: var TLexer, tok: var TToken, rawMode: bool) =
proc getString(L: var TLexer, tok: var TToken, rawMode: bool) =
var pos = L.bufpos + 1 # skip "
var buf = L.buf # put `buf` in a register
var line = L.lineNumber # save linenumber for better error message
if buf[pos] == '\"' and buf[pos+1] == '\"':
if buf[pos] == '\"' and buf[pos+1] == '\"':
tok.tokType = tkTripleStrLit # long string literal:
inc(pos, 2) # skip ""
# skip leading newline:
@@ -572,105 +572,105 @@ proc getString(L: var TLexer, tok: var TToken, rawMode: bool) =
if buf[newpos] in {CR, LF}: pos = newpos
pos = handleCRLF(L, pos)
buf = L.buf
while true:
while true:
case buf[pos]
of '\"':
of '\"':
if buf[pos+1] == '\"' and buf[pos+2] == '\"' and
buf[pos+3] != '\"':
buf[pos+3] != '\"':
L.bufpos = pos + 3 # skip the three """
break
break
add(tok.literal, '\"')
inc(pos)
of CR, LF:
of CR, LF:
pos = handleCRLF(L, pos)
buf = L.buf
add(tok.literal, tnl)
of nimlexbase.EndOfFile:
of nimlexbase.EndOfFile:
var line2 = L.lineNumber
L.lineNumber = line
lexMessagePos(L, errClosingTripleQuoteExpected, L.lineStart)
L.lineNumber = line2
break
else:
break
else:
add(tok.literal, buf[pos])
inc(pos)
else:
else:
# ordinary string literal
if rawMode: tok.tokType = tkRStrLit
else: tok.tokType = tkStrLit
while true:
while true:
var c = buf[pos]
if c == '\"':
if c == '\"':
if rawMode and buf[pos+1] == '\"':
inc(pos, 2)
add(tok.literal, '"')
else:
inc(pos) # skip '"'
break
elif c in {CR, LF, nimlexbase.EndOfFile}:
elif c in {CR, LF, nimlexbase.EndOfFile}:
lexMessage(L, errClosingQuoteExpected)
break
elif (c == '\\') and not rawMode:
break
elif (c == '\\') and not rawMode:
L.bufpos = pos
getEscapedChar(L, tok)
pos = L.bufpos
else:
else:
add(tok.literal, c)
inc(pos)
L.bufpos = pos
proc getCharacter(L: var TLexer, tok: var TToken) =
proc getCharacter(L: var TLexer, tok: var TToken) =
inc(L.bufpos) # skip '
var c = L.buf[L.bufpos]
case c
of '\0'..pred(' '), '\'': lexMessage(L, errInvalidCharacterConstant)
of '\\': getEscapedChar(L, tok)
else:
else:
tok.literal = $c
inc(L.bufpos)
if L.buf[L.bufpos] != '\'': lexMessage(L, errMissingFinalQuote)
inc(L.bufpos) # skip '
proc getSymbol(L: var TLexer, tok: var TToken) =
proc getSymbol(L: var TLexer, tok: var TToken) =
var h: THash = 0
var pos = L.bufpos
var buf = L.buf
while true:
while true:
var c = buf[pos]
case c
of 'a'..'z', '0'..'9', '\x80'..'\xFF':
of 'a'..'z', '0'..'9', '\x80'..'\xFF':
h = h !& ord(c)
of 'A'..'Z':
of 'A'..'Z':
c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
h = h !& ord(c)
of '_':
if buf[pos+1] notin SymChars:
if buf[pos+1] notin SymChars:
lexMessage(L, errInvalidToken, "_")
break
else: break
else: break
inc(pos)
h = !$h
tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
L.bufpos = pos
if (tok.ident.id < ord(tokKeywordLow) - ord(tkSymbol)) or
(tok.ident.id > ord(tokKeywordHigh) - ord(tkSymbol)):
(tok.ident.id > ord(tokKeywordHigh) - ord(tkSymbol)):
tok.tokType = tkSymbol
else:
else:
tok.tokType = TTokType(tok.ident.id + ord(tkSymbol))
proc endOperator(L: var TLexer, tok: var TToken, pos: int,
hash: THash) {.inline.} =
hash: THash) {.inline.} =
var h = !$hash
tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh): tok.tokType = tkOpr
else: tok.tokType = TTokType(tok.ident.id - oprLow + ord(tkColon))
L.bufpos = pos
proc getOperator(L: var TLexer, tok: var TToken) =
proc getOperator(L: var TLexer, tok: var TToken) =
var pos = L.bufpos
var buf = L.buf
var h: THash = 0
while true:
while true:
var c = buf[pos]
if c notin OpChars: break
h = h !& ord(c)
@@ -699,7 +699,7 @@ proc scanComment(L: var TLexer, tok: var TToken) =
return
else:
lexMessagePos(L, warnDeprecated, pos, "use '## [' instead; '##['")
tok.tokType = tkComment
# iNumber contains the number of '\n' in the token
tok.iNumber = 0
@@ -723,7 +723,7 @@ proc scanComment(L: var TLexer, tok: var TToken) =
pos = handleCRLF(L, pos)
buf = L.buf
var indent = 0
while buf[pos] == ' ':
while buf[pos] == ' ':
inc(pos)
inc(indent)
@@ -738,7 +738,7 @@ proc scanComment(L: var TLexer, tok: var TToken) =
when defined(nimfix): col = indent
inc tok.iNumber
else:
if buf[pos] > ' ':
if buf[pos] > ' ':
L.indentAhead = indent
break
L.bufpos = pos
@@ -801,10 +801,10 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
getSymbol(L, tok)
else:
case c
of '#':
of '#':
scanComment(L, tok)
of '*':
# '*:' is unfortunately a special case, because it is two tokens in
# '*:' is unfortunately a special case, because it is two tokens in
# 'var v*: int'.
if L.buf[L.bufpos+1] == ':' and L.buf[L.bufpos+2] notin OpChars:
var h = 0 !& ord('*')
@@ -814,29 +814,29 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
of ',':
tok.tokType = tkComma
inc(L.bufpos)
of 'l':
of 'l':
# if we parsed exactly one character and its a small L (l), this
# is treated as a warning because it may be confused with the number 1
if L.buf[L.bufpos+1] notin (SymChars + {'_'}):
lexMessage(L, warnSmallLshouldNotBeUsed)
getSymbol(L, tok)
of 'r', 'R':
if L.buf[L.bufpos + 1] == '\"':
if L.buf[L.bufpos + 1] == '\"':
inc(L.bufpos)
getString(L, tok, true)
else:
else:
getSymbol(L, tok)
of '(':
of '(':
inc(L.bufpos)
if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
tok.tokType = tkParDotLe
inc(L.bufpos)
else:
else:
tok.tokType = tkParLe
of ')':
of ')':
tok.tokType = tkParRi
inc(L.bufpos)
of '[':
of '[':
inc(L.bufpos)
if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
tok.tokType = tkBracketDotLe
@@ -847,34 +847,34 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
tok.tokType = tkBracketRi
inc(L.bufpos)
of '.':
if L.buf[L.bufpos+1] == ']':
if L.buf[L.bufpos+1] == ']':
tok.tokType = tkBracketDotRi
inc(L.bufpos, 2)
elif L.buf[L.bufpos+1] == '}':
elif L.buf[L.bufpos+1] == '}':
tok.tokType = tkCurlyDotRi
inc(L.bufpos, 2)
elif L.buf[L.bufpos+1] == ')':
elif L.buf[L.bufpos+1] == ')':
tok.tokType = tkParDotRi
inc(L.bufpos, 2)
else:
else:
getOperator(L, tok)
of '{':
of '{':
inc(L.bufpos)
if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] != '.':
tok.tokType = tkCurlyDotLe
inc(L.bufpos)
else:
else:
tok.tokType = tkCurlyLe
of '}':
of '}':
tok.tokType = tkCurlyRi
inc(L.bufpos)
of ';':
of ';':
tok.tokType = tkSemiColon
inc(L.bufpos)
of '`':
of '`':
tok.tokType = tkAccent
inc(L.bufpos)
of '\"':
of '\"':
# check for extended raw string literal:
var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars
getString(L, tok, rawMode)
@@ -889,7 +889,7 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
of '0'..'9':
tok = getNumber(L)
else:
if c in OpChars:
if c in OpChars:
getOperator(L, tok)
elif c == nimlexbase.EndOfFile:
tok.tokType = tkEof

View File

@@ -1073,8 +1073,10 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode =
else:
result = newNodeP(nkObjectTy, p)
getTok(p)
of tkGeneric:
of tkGeneric, tkConcept:
if mode == pmTypeDef:
if p.tok.tokType == tkGeneric:
parMessage(p, warnDeprecated, "use 'concept' instead; 'generic'")
result = parseTypeClass(p)
else:
parMessage(p, errInvalidToken, p.tok)
@@ -1107,7 +1109,7 @@ proc parseTypeDesc(p: var TParser): PNode =
proc parseTypeDefAux(p: var TParser): PNode =
#| typeDefAux = simpleExpr
#| | 'generic' typeClass
#| | 'concept' typeClass
result = simpleExpr(p, pmTypeDef)
proc makeCall(n: PNode): PNode =

View File

@@ -725,7 +725,7 @@ proc gproc(g: var TSrcGen, n: PNode) =
proc gTypeClassTy(g: var TSrcGen, n: PNode) =
var c: TContext
initContext(c)
putWithSpace(g, tkGeneric, "generic")
putWithSpace(g, tkConcept, "concept")
gsons(g, n[0], c) # arglist
gsub(g, n[1]) # pragmas
gsub(g, n[2]) # of

View File

@@ -23,7 +23,7 @@ type
wInvalid,
wAddr, wAnd, wAs, wAsm, wAtomic,
wBind, wBlock, wBreak, wCase, wCast, wConst,
wBind, wBlock, wBreak, wCase, wCast, wConcept, wConst,
wContinue, wConverter, wDefer, wDiscard, wDistinct, wDiv, wDo,
wElif, wElse, wEnd, wEnum, wExcept, wExport,
wFinally, wFor, wFrom, wFunc, wGeneric, wIf, wImport, wIn,
@@ -103,7 +103,7 @@ const
"addr", "and", "as", "asm", "atomic",
"bind", "block", "break", "case", "cast",
"const", "continue", "converter",
"concept", "const", "continue", "converter",
"defer", "discard", "distinct", "div", "do",
"elif", "else", "end", "enum", "except", "export",
"finally", "for", "from", "func", "generic", "if",

View File

@@ -52,11 +52,11 @@ identOrLiteral = generalizedLit | symbol | literal
tupleConstr = '(' optInd (exprColonEqExpr comma?)* optPar ')'
arrayConstr = '[' optInd (exprColonEqExpr comma?)* optPar ']'
primarySuffix = '(' (exprColonEqExpr comma?)* ')' doBlocks?
| doBlocks
| '.' optInd symbol generalizedLit?
| '[' optInd indexExprList optPar ']'
| '{' optInd indexExprList optPar '}'
| &( '`'|IDENT|literal|'cast') expr # command syntax
| doBlocks
| '.' optInd symbol generalizedLit?
| '[' optInd indexExprList optPar ']'
| '{' optInd indexExprList optPar '}'
| &( '`'|IDENT|literal|'cast'|'addr'|'type') expr # command syntax
condExpr = expr colcom expr optInd
('elif' expr colcom expr optInd)*
'else' colcom expr
@@ -94,7 +94,7 @@ primary = typeKeyw typeDescK
/ 'bind' primary
typeDesc = simpleExpr
typeDefAux = simpleExpr
| 'generic' typeClass
| 'concept' typeClass
macroColon = ':' stmt? ( IND{=} 'of' exprList ':' stmt
| IND{=} 'elif' expr ':' stmt
| IND{=} 'except' exprList ':' stmt

View File

@@ -1,6 +1,6 @@
addr and as asm atomic
bind block break
case cast const continue converter
case cast concept const continue converter
defer discard distinct div do
elif else end enum except export
finally for from func

View File

@@ -60,7 +60,7 @@ Is operator
-----------
The ``is`` operator checks for type equivalence at compile time. It is
therefore very useful for type specialization within generic code:
therefore very useful for type specialization within generic code:
.. code-block:: nim
type
@@ -75,7 +75,7 @@ Type operator
-------------
The ``type`` (in many other languages called `typeof`:idx:) operator can
be used to get the type of an expression:
be used to get the type of an expression:
.. code-block:: nim
var x = 0
@@ -88,7 +88,7 @@ other interpretations:
.. code-block:: nim
import strutils
# strutils contains both a ``split`` proc and iterator, but since an
# an iterator is the preferred interpretation, `y` has the type ``string``:
var y: type("a b c".split)
@@ -98,7 +98,7 @@ Type Classes
------------
A type class is a special pseudo-type that can be used to match against
types in the context of overload resolution or the ``is`` operator.
types in the context of overload resolution or the ``is`` operator.
Nim supports the following built-in type classes:
================== ===================================================
@@ -116,7 +116,7 @@ type class matches
``array`` any array type
``set`` any set type
``seq`` any seq type
``auto`` any type
``auto`` any type
================== ===================================================
Furthermore, every generic type automatically creates a type class of the same
@@ -134,7 +134,7 @@ more complex type classes:
echo key, " = ", value
Procedures utilizing type classes in such manner are considered to be
`implicitly generic`:idx:. They will be instantiated once for each unique
`implicitly generic`:idx:. They will be instantiated once for each unique
combination of param types used within the program.
Nim also allows for type classes and regular types to be specified
@@ -142,7 +142,7 @@ as `type constraints`:idx: of the generic type parameter:
.. code-block:: nim
proc onlyIntOrString[T: int|string](x, y: T) = discard
onlyIntOrString(450, 616) # valid
onlyIntOrString(5.0, 0.0) # type mismatch
onlyIntOrString("xy", 50) # invalid as 'T' cannot be both at the same time
@@ -152,7 +152,7 @@ exactly one concrete type. Here is an example taken directly from the system
module to illustrate this:
.. code-block:: nim
proc `==`*(x, y: tuple): bool =
proc `==`*(x, y: tuple): bool =
## requires `x` and `y` to be of the same tuple type
## generic ``==`` operator for tuples that is lifted from the components
## of `x` and `y`.
@@ -160,8 +160,8 @@ module to illustrate this:
for a, b in fields(x, y):
if a != b: result = false
Alternatively, the ``distinct`` type modifier can be applied to the type class
to allow each param matching the type class to bind to a different type.
Alternatively, the ``distinct`` type modifier can be applied to the type class
to allow each param matching the type class to bind to a different type.
If a proc param doesn't have a type specified, Nim will use the
``distinct auto`` type class (also known as ``any``):
@@ -178,7 +178,7 @@ the dot syntax:
type Matrix[T, Rows, Columns] = object
...
proc `[]`(m: Matrix, row, col: int): Matrix.T =
proc `[]`(m: Matrix, row, col: int): Matrix.T =
m.data[col * high(Matrix.Columns) + row]
Alternatively, the `type` operator can be used over the proc params for similar
@@ -189,7 +189,7 @@ type, this results in another more specific type class:
.. code-block:: nim
seq[ref object] # Any sequence storing references to any object type
type T1 = auto
proc foo(s: seq[T1], e: T1)
# seq[T1] is the same as just `seq`, but T1 will be allowed to bind
@@ -203,34 +203,34 @@ be inferred to have the equivalent of the `any` type class and thus they will
match anything without discrimination.
User defined type classes
-------------------------
Concepts
--------
**Note**: User defined type classes are still in development.
**Note**: Concepts are still in development.
The user-defined type classes are available in two flavours - declarative and
imperative. Both are used to specify an arbitrary set of requirements that the
matched type must satisfy.
Concepts, also known as "user-defined type classes", are available in two
flavours - declarative and imperative. Both are used to specify an arbitrary
set of requirements that the matched type must satisfy.
Declarative type classes are written in the following form:
.. code-block:: nim
type
Comparable = generic x, y
Comparable = concept x, y
(x < y) is bool
Container[T] = generic c
Container[T] = concept c
c.len is ordinal
items(c) is iterator
for value in c:
type(value) is T
The type class will be matched if:
The concept will be matched if:
a) all of the expressions within the body can be compiled for the tested type
b) all statically evaluatable boolean expressions in the body must be true
The identifiers following the `generic` keyword represent instances of the
The identifiers following the ``concept`` keyword represent instances of the
currently matched type. These instances can act both as variables of the type,
when used in contexts where a value is expected, and as the type itself when
used in contexts where a type is expected.
@@ -240,18 +240,18 @@ type signatures of the required operations, but since type inference and
default parameters are still applied in the provided block, it's also possible
to encode usage protocols that do not reveal implementation details.
As a special rule providing further convenience when writing type classes, any
As a special rule providing further convenience when writing concepts, any
type value appearing in a callable expression will be treated as a variable of
the designated type for overload resolution purposes, unless the type value was
passed in its explicit ``typedesc[T]`` form:
.. code-block:: nim
type
OutputStream = generic S
OutputStream = concept S
write(var S, string)
Much like generics, the user defined type classes will be instantiated exactly
once for each tested type and any static code included within them will also be
Much like generics, concepts are instantiated exactly
once for each tested type and any static code included within them is also
executed once.
@@ -269,7 +269,7 @@ The return type will be treated as an additional generic param and can be
explicitly specified at call sites as any other generic param.
Future versions of Nim may also support overloading based on the return type
of the overloads. In such settings, the expected result type at call sites may
of the overloads. In such settings, the expected result type at call sites may
also influence the inferred return type.
..
@@ -293,12 +293,12 @@ at definition and the context at instantiation are considered:
.. code-block:: nim
type
Index = distinct int
proc `==` (a, b: Index): bool {.borrow.}
var a = (0, 0.Index)
var b = (0, 0.Index)
echo a == b # works!
In the example the generic ``==`` for tuples (as defined in the system module)
@@ -307,7 +307,7 @@ the ``Index`` type is defined *after* the ``==`` for tuples; yet the example
compiles as the instantiation takes the currently defined symbols into account
too.
A symbol can be forced to be open by a `mixin`:idx: declaration:
A symbol can be forced to be open by a `mixin`:idx: declaration:
.. code-block:: nim
proc create*[T](): ref T =
@@ -321,16 +321,16 @@ A symbol can be forced to be open by a `mixin`:idx: declaration:
Bind statement
--------------
The ``bind`` statement is the counterpart to the ``mixin`` statement. It
The ``bind`` statement is the counterpart to the ``mixin`` statement. It
can be used to explicitly declare identifiers that should be bound early (i.e.
the identifiers should be looked up in the scope of the template/generic
definition):
.. code-block:: nim
# Module A
var
var
lastId = 0
template genId*: expr =
bind lastId
inc(lastId)
@@ -339,7 +339,7 @@ definition):
.. code-block:: nim
# Module B
import A
echo genId()
But a ``bind`` is rarely useful because symbol binding from the definition

View File

@@ -6,7 +6,7 @@ static[T]
**Note**: static[T] is still in development.
As their name suggests, static params must be known at compile-time:
As their name suggests, static parameters must be known at compile-time:
.. code-block:: nim
@@ -23,23 +23,7 @@ As their name suggests, static params must be known at compile-time:
For the purposes of code generation, all static params are treated as
generic params - the proc will be compiled separately for each unique
supplied value (or combination of values).
Furthermore, the system module defines a `semistatic[T]` type that can be
used to declare procs accepting both static and run-time values, which can
optimize their body according to the supplied param using the `isStatic(p)`
predicate:
.. code-block:: nim
# The following proc will be compiled once for each unique static
# value and also once for the case handling all run-time values:
proc re(pattern: semistatic[string]): RegEx =
when isStatic(pattern):
result = precompiledRegex(pattern)
else:
result = compile(pattern)
supplied value (or combination of values).
Static params can also appear in the signatures of generic types:
@@ -61,7 +45,7 @@ typedesc
--------
`typedesc` is a special type allowing one to treat types as compile-time values
(i.e. if types are compile-time values and all values have a type, then
(i.e. if types are compile-time values and all values have a type, then
typedesc must be their type).
When used as a regular proc param, typedesc acts as a type class. The proc
@@ -100,7 +84,7 @@ When used with macros and .compileTime. procs on the other hand, the compiler
does not need to instantiate the code multiple times, because types then can be
manipulated using the unified internal symbol representation. In such context
typedesc acts as any other type. One can create variables, store typedesc
values inside containers and so on. For example, here is how one can create
values inside containers and so on. For example, here is how one can create
a type-safe wrapper for the unsafe `printf` function from C:
.. code-block:: nim
@@ -114,7 +98,7 @@ a type-safe wrapper for the unsafe `printf` function from C:
of 's': string
of 'p': pointer
else: EOutOfRange
var actualType = args[i].getType
inc i
@@ -123,7 +107,7 @@ a type-safe wrapper for the unsafe `printf` function from C:
elif expectedType != actualType:
error "type mismatch for argument ", i, ". expected type: ",
expectedType.name, ", actual type: ", actualType.name
# keep the original callsite, but use cprintf instead
result = callsite()
result[0] = newIdentNode(!"cprintf")

View File

@@ -14,14 +14,14 @@
import
strutils
type
TTokenClass* = enum
gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber,
gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit,
type
TTokenClass* = enum
gtEof, gtNone, gtWhitespace, gtDecNumber, gtBinNumber, gtHexNumber,
gtOctNumber, gtFloatNumber, gtIdentifier, gtKeyword, gtStringLit,
gtLongStringLit, gtCharLit, gtEscapeSequence, # escape sequence like \xff
gtOperator, gtPunctuation, gtComment, gtLongComment, gtRegularExpression,
gtTagStart, gtTagEnd, gtKey, gtValue, gtRawData, gtAssembler,
gtPreprocessor, gtDirective, gtCommand, gtRule, gtHyperlink, gtLabel,
gtOperator, gtPunctuation, gtComment, gtLongComment, gtRegularExpression,
gtTagStart, gtTagEnd, gtKey, gtValue, gtRawData, gtAssembler,
gtPreprocessor, gtDirective, gtCommand, gtRule, gtHyperlink, gtLabel,
gtReference, gtOther
TGeneralTokenizer* = object of RootObj
kind*: TTokenClass
@@ -30,27 +30,27 @@ type
pos: int
state: TTokenClass
TSourceLanguage* = enum
TSourceLanguage* = enum
langNone, langNim, langNimrod, langCpp, langCsharp, langC, langJava
const
const
sourceLanguageToStr*: array[TSourceLanguage, string] = ["none",
"Nim", "Nimrod", "C++", "C#", "C", "Java"]
tokenClassToStr*: array[TTokenClass, string] = ["Eof", "None", "Whitespace",
"DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber",
"Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit",
"EscapeSequence", "Operator", "Punctuation", "Comment", "LongComment",
"RegularExpression", "TagStart", "TagEnd", "Key", "Value", "RawData",
"Assembler", "Preprocessor", "Directive", "Command", "Rule", "Hyperlink",
tokenClassToStr*: array[TTokenClass, string] = ["Eof", "None", "Whitespace",
"DecNumber", "BinNumber", "HexNumber", "OctNumber", "FloatNumber",
"Identifier", "Keyword", "StringLit", "LongStringLit", "CharLit",
"EscapeSequence", "Operator", "Punctuation", "Comment", "LongComment",
"RegularExpression", "TagStart", "TagEnd", "Key", "Value", "RawData",
"Assembler", "Preprocessor", "Directive", "Command", "Rule", "Hyperlink",
"Label", "Reference", "Other"]
# The following list comes from doc/keywords.txt, make sure it is
# synchronized with this array by running the module itself as a test case.
nimKeywords = ["addr", "and", "as", "asm", "atomic", "bind", "block",
"break", "case", "cast", "const", "continue", "converter",
"break", "case", "cast", "concept", "const", "continue", "converter",
"defer", "discard", "distinct", "div", "do",
"elif", "else", "end", "enum", "except", "export",
"finally", "for", "from", "func",
"finally", "for", "from", "func",
"generic", "if", "import", "in", "include",
"interface", "is", "isnot", "iterator", "let", "macro", "method",
"mixin", "mod", "nil", "not", "notin", "object", "of", "or", "out", "proc",
@@ -58,12 +58,12 @@ const
"template", "try", "tuple", "type", "using", "var", "when", "while", "with",
"without", "xor", "yield"]
proc getSourceLanguage*(name: string): TSourceLanguage =
for i in countup(succ(low(TSourceLanguage)), high(TSourceLanguage)):
if cmpIgnoreStyle(name, sourceLanguageToStr[i]) == 0:
proc getSourceLanguage*(name: string): TSourceLanguage =
for i in countup(succ(low(TSourceLanguage)), high(TSourceLanguage)):
if cmpIgnoreStyle(name, sourceLanguageToStr[i]) == 0:
return i
result = langNone
proc initGeneralTokenizer*(g: var TGeneralTokenizer, buf: cstring) =
g.buf = buf
g.kind = low(TTokenClass)
@@ -74,52 +74,52 @@ proc initGeneralTokenizer*(g: var TGeneralTokenizer, buf: cstring) =
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
g.pos = pos
proc initGeneralTokenizer*(g: var TGeneralTokenizer, buf: string) =
proc initGeneralTokenizer*(g: var TGeneralTokenizer, buf: string) =
initGeneralTokenizer(g, cstring(buf))
proc deinitGeneralTokenizer*(g: var TGeneralTokenizer) =
proc deinitGeneralTokenizer*(g: var TGeneralTokenizer) =
discard
proc nimGetKeyword(id: string): TTokenClass =
proc nimGetKeyword(id: string): TTokenClass =
for k in nimKeywords:
if cmpIgnoreStyle(id, k) == 0: return gtKeyword
result = gtIdentifier
when false:
var i = getIdent(id)
if (i.id >= ord(tokKeywordLow) - ord(tkSymbol)) and
(i.id <= ord(tokKeywordHigh) - ord(tkSymbol)):
(i.id <= ord(tokKeywordHigh) - ord(tkSymbol)):
result = gtKeyword
else:
else:
result = gtIdentifier
proc nimNumberPostfix(g: var TGeneralTokenizer, position: int): int =
proc nimNumberPostfix(g: var TGeneralTokenizer, position: int): int =
var pos = position
if g.buf[pos] == '\'':
if g.buf[pos] == '\'':
inc(pos)
case g.buf[pos]
of 'f', 'F':
of 'f', 'F':
g.kind = gtFloatNumber
inc(pos)
if g.buf[pos] in {'0'..'9'}: inc(pos)
if g.buf[pos] in {'0'..'9'}: inc(pos)
of 'i', 'I':
of 'i', 'I':
inc(pos)
if g.buf[pos] in {'0'..'9'}: inc(pos)
if g.buf[pos] in {'0'..'9'}: inc(pos)
else:
else:
discard
result = pos
proc nimNumber(g: var TGeneralTokenizer, position: int): int =
proc nimNumber(g: var TGeneralTokenizer, position: int): int =
const decChars = {'0'..'9', '_'}
var pos = position
g.kind = gtDecNumber
while g.buf[pos] in decChars: inc(pos)
if g.buf[pos] == '.':
if g.buf[pos] == '.':
g.kind = gtFloatNumber
inc(pos)
while g.buf[pos] in decChars: inc(pos)
if g.buf[pos] in {'e', 'E'}:
if g.buf[pos] in {'e', 'E'}:
g.kind = gtFloatNumber
inc(pos)
if g.buf[pos] in {'+', '-'}: inc(pos)
@@ -127,150 +127,150 @@ proc nimNumber(g: var TGeneralTokenizer, position: int): int =
result = nimNumberPostfix(g, pos)
const
OpChars = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
OpChars = {'+', '-', '*', '/', '\\', '<', '>', '!', '?', '^', '.',
'|', '=', '%', '&', '$', '@', '~', ':', '\x80'..'\xFF'}
proc nimNextToken(g: var TGeneralTokenizer) =
const
proc nimNextToken(g: var TGeneralTokenizer) =
const
hexChars = {'0'..'9', 'A'..'F', 'a'..'f', '_'}
octChars = {'0'..'7', '_'}
binChars = {'0'..'1', '_'}
SymChars = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
var pos = g.pos
g.start = g.pos
if g.state == gtStringLit:
if g.state == gtStringLit:
g.kind = gtStringLit
while true:
while true:
case g.buf[pos]
of '\\':
of '\\':
g.kind = gtEscapeSequence
inc(pos)
case g.buf[pos]
of 'x', 'X':
of 'x', 'X':
inc(pos)
if g.buf[pos] in hexChars: inc(pos)
if g.buf[pos] in hexChars: inc(pos)
of '0'..'9':
of '0'..'9':
while g.buf[pos] in {'0'..'9'}: inc(pos)
of '\0':
of '\0':
g.state = gtNone
else: inc(pos)
break
of '\0', '\x0D', '\x0A':
break
of '\0', '\x0D', '\x0A':
g.state = gtNone
break
of '\"':
break
of '\"':
inc(pos)
g.state = gtNone
break
break
else: inc(pos)
else:
else:
case g.buf[pos]
of ' ', '\x09'..'\x0D':
of ' ', '\x09'..'\x0D':
g.kind = gtWhitespace
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
of '#':
of '#':
g.kind = gtComment
while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos)
of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF':
of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF':
var id = ""
while g.buf[pos] in SymChars + {'_'}:
while g.buf[pos] in SymChars + {'_'}:
add(id, g.buf[pos])
inc(pos)
if (g.buf[pos] == '\"'):
if (g.buf[pos + 1] == '\"') and (g.buf[pos + 2] == '\"'):
if (g.buf[pos] == '\"'):
if (g.buf[pos + 1] == '\"') and (g.buf[pos + 2] == '\"'):
inc(pos, 3)
g.kind = gtLongStringLit
while true:
while true:
case g.buf[pos]
of '\0':
break
of '\"':
of '\0':
break
of '\"':
inc(pos)
if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and
g.buf[pos+2] != '\"':
if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and
g.buf[pos+2] != '\"':
inc(pos, 2)
break
break
else: inc(pos)
else:
else:
g.kind = gtRawData
inc(pos)
while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}):
while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}):
if g.buf[pos] == '"' and g.buf[pos+1] != '"': break
inc(pos)
if g.buf[pos] == '\"': inc(pos)
else:
else:
g.kind = nimGetKeyword(id)
of '0':
of '0':
inc(pos)
case g.buf[pos]
of 'b', 'B':
of 'b', 'B':
inc(pos)
while g.buf[pos] in binChars: inc(pos)
pos = nimNumberPostfix(g, pos)
of 'x', 'X':
of 'x', 'X':
inc(pos)
while g.buf[pos] in hexChars: inc(pos)
pos = nimNumberPostfix(g, pos)
of 'o', 'O':
of 'o', 'O':
inc(pos)
while g.buf[pos] in octChars: inc(pos)
pos = nimNumberPostfix(g, pos)
else: pos = nimNumber(g, pos)
of '1'..'9':
of '1'..'9':
pos = nimNumber(g, pos)
of '\'':
of '\'':
inc(pos)
g.kind = gtCharLit
while true:
while true:
case g.buf[pos]
of '\0', '\x0D', '\x0A':
break
of '\'':
of '\0', '\x0D', '\x0A':
break
of '\'':
inc(pos)
break
of '\\':
break
of '\\':
inc(pos, 2)
else: inc(pos)
of '\"':
of '\"':
inc(pos)
if (g.buf[pos] == '\"') and (g.buf[pos + 1] == '\"'):
if (g.buf[pos] == '\"') and (g.buf[pos + 1] == '\"'):
inc(pos, 2)
g.kind = gtLongStringLit
while true:
while true:
case g.buf[pos]
of '\0':
break
of '\"':
of '\0':
break
of '\"':
inc(pos)
if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and
g.buf[pos+2] != '\"':
if g.buf[pos] == '\"' and g.buf[pos+1] == '\"' and
g.buf[pos+2] != '\"':
inc(pos, 2)
break
break
else: inc(pos)
else:
else:
g.kind = gtStringLit
while true:
while true:
case g.buf[pos]
of '\0', '\x0D', '\x0A':
break
of '\"':
of '\0', '\x0D', '\x0A':
break
of '\"':
inc(pos)
break
of '\\':
break
of '\\':
g.state = g.kind
break
break
else: inc(pos)
of '(', ')', '[', ']', '{', '}', '`', ':', ',', ';':
of '(', ')', '[', ']', '{', '}', '`', ':', ',', ';':
inc(pos)
g.kind = gtPunctuation
of '\0':
of '\0':
g.kind = gtEof
else:
if g.buf[pos] in OpChars:
else:
if g.buf[pos] in OpChars:
g.kind = gtOperator
while g.buf[pos] in OpChars: inc(pos)
else:
else:
inc(pos)
g.kind = gtNone
g.length = pos - g.pos
@@ -278,211 +278,211 @@ proc nimNextToken(g: var TGeneralTokenizer) =
assert false, "nimNextToken: produced an empty token"
g.pos = pos
proc generalNumber(g: var TGeneralTokenizer, position: int): int =
proc generalNumber(g: var TGeneralTokenizer, position: int): int =
const decChars = {'0'..'9'}
var pos = position
g.kind = gtDecNumber
while g.buf[pos] in decChars: inc(pos)
if g.buf[pos] == '.':
if g.buf[pos] == '.':
g.kind = gtFloatNumber
inc(pos)
while g.buf[pos] in decChars: inc(pos)
if g.buf[pos] in {'e', 'E'}:
if g.buf[pos] in {'e', 'E'}:
g.kind = gtFloatNumber
inc(pos)
if g.buf[pos] in {'+', '-'}: inc(pos)
while g.buf[pos] in decChars: inc(pos)
result = pos
proc generalStrLit(g: var TGeneralTokenizer, position: int): int =
const
proc generalStrLit(g: var TGeneralTokenizer, position: int): int =
const
decChars = {'0'..'9'}
hexChars = {'0'..'9', 'A'..'F', 'a'..'f'}
var pos = position
g.kind = gtStringLit
var c = g.buf[pos]
inc(pos) # skip " or '
while true:
while true:
case g.buf[pos]
of '\0':
break
of '\\':
of '\0':
break
of '\\':
inc(pos)
case g.buf[pos]
of '\0':
break
of '0'..'9':
of '\0':
break
of '0'..'9':
while g.buf[pos] in decChars: inc(pos)
of 'x', 'X':
of 'x', 'X':
inc(pos)
if g.buf[pos] in hexChars: inc(pos)
if g.buf[pos] in hexChars: inc(pos)
else: inc(pos, 2)
else:
if g.buf[pos] == c:
else:
if g.buf[pos] == c:
inc(pos)
break
else:
break
else:
inc(pos)
result = pos
proc isKeyword(x: openArray[string], y: string): int =
proc isKeyword(x: openArray[string], y: string): int =
var a = 0
var b = len(x) - 1
while a <= b:
while a <= b:
var mid = (a + b) div 2
var c = cmp(x[mid], y)
if c < 0:
if c < 0:
a = mid + 1
elif c > 0:
elif c > 0:
b = mid - 1
else:
else:
return mid
result = - 1
proc isKeywordIgnoreCase(x: openArray[string], y: string): int =
proc isKeywordIgnoreCase(x: openArray[string], y: string): int =
var a = 0
var b = len(x) - 1
while a <= b:
while a <= b:
var mid = (a + b) div 2
var c = cmpIgnoreCase(x[mid], y)
if c < 0:
if c < 0:
a = mid + 1
elif c > 0:
elif c > 0:
b = mid - 1
else:
else:
return mid
result = - 1
type
TTokenizerFlag = enum
type
TTokenizerFlag = enum
hasPreprocessor, hasNestedComments
TTokenizerFlags = set[TTokenizerFlag]
proc clikeNextToken(g: var TGeneralTokenizer, keywords: openArray[string],
flags: TTokenizerFlags) =
const
proc clikeNextToken(g: var TGeneralTokenizer, keywords: openArray[string],
flags: TTokenizerFlags) =
const
hexChars = {'0'..'9', 'A'..'F', 'a'..'f'}
octChars = {'0'..'7'}
binChars = {'0'..'1'}
symChars = {'A'..'Z', 'a'..'z', '0'..'9', '_', '\x80'..'\xFF'}
var pos = g.pos
g.start = g.pos
if g.state == gtStringLit:
if g.state == gtStringLit:
g.kind = gtStringLit
while true:
while true:
case g.buf[pos]
of '\\':
of '\\':
g.kind = gtEscapeSequence
inc(pos)
case g.buf[pos]
of 'x', 'X':
of 'x', 'X':
inc(pos)
if g.buf[pos] in hexChars: inc(pos)
if g.buf[pos] in hexChars: inc(pos)
of '0'..'9':
of '0'..'9':
while g.buf[pos] in {'0'..'9'}: inc(pos)
of '\0':
of '\0':
g.state = gtNone
else: inc(pos)
break
of '\0', '\x0D', '\x0A':
break
of '\0', '\x0D', '\x0A':
g.state = gtNone
break
of '\"':
break
of '\"':
inc(pos)
g.state = gtNone
break
break
else: inc(pos)
else:
else:
case g.buf[pos]
of ' ', '\x09'..'\x0D':
of ' ', '\x09'..'\x0D':
g.kind = gtWhitespace
while g.buf[pos] in {' ', '\x09'..'\x0D'}: inc(pos)
of '/':
of '/':
inc(pos)
if g.buf[pos] == '/':
if g.buf[pos] == '/':
g.kind = gtComment
while not (g.buf[pos] in {'\0', '\x0A', '\x0D'}): inc(pos)
elif g.buf[pos] == '*':
elif g.buf[pos] == '*':
g.kind = gtLongComment
var nested = 0
inc(pos)
while true:
while true:
case g.buf[pos]
of '*':
of '*':
inc(pos)
if g.buf[pos] == '/':
if g.buf[pos] == '/':
inc(pos)
if nested == 0: break
of '/':
if nested == 0: break
of '/':
inc(pos)
if g.buf[pos] == '*':
if g.buf[pos] == '*':
inc(pos)
if hasNestedComments in flags: inc(nested)
of '\0':
break
of '\0':
break
else: inc(pos)
of '#':
of '#':
inc(pos)
if hasPreprocessor in flags:
if hasPreprocessor in flags:
g.kind = gtPreprocessor
while g.buf[pos] in {' ', '\t'}: inc(pos)
while g.buf[pos] in symChars: inc(pos)
else:
else:
g.kind = gtOperator
of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF':
of 'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF':
var id = ""
while g.buf[pos] in symChars:
while g.buf[pos] in symChars:
add(id, g.buf[pos])
inc(pos)
if isKeyword(keywords, id) >= 0: g.kind = gtKeyword
else: g.kind = gtIdentifier
of '0':
of '0':
inc(pos)
case g.buf[pos]
of 'b', 'B':
of 'b', 'B':
inc(pos)
while g.buf[pos] in binChars: inc(pos)
if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
of 'x', 'X':
of 'x', 'X':
inc(pos)
while g.buf[pos] in hexChars: inc(pos)
if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
of '0'..'7':
of '0'..'7':
inc(pos)
while g.buf[pos] in octChars: inc(pos)
if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
else:
else:
pos = generalNumber(g, pos)
if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
of '1'..'9':
of '1'..'9':
pos = generalNumber(g, pos)
if g.buf[pos] in {'A'..'Z', 'a'..'z'}: inc(pos)
of '\'':
of '\'':
pos = generalStrLit(g, pos)
g.kind = gtCharLit
of '\"':
of '\"':
inc(pos)
g.kind = gtStringLit
while true:
while true:
case g.buf[pos]
of '\0':
break
of '\"':
of '\0':
break
of '\"':
inc(pos)
break
of '\\':
break
of '\\':
g.state = g.kind
break
break
else: inc(pos)
of '(', ')', '[', ']', '{', '}', ':', ',', ';', '.':
of '(', ')', '[', ']', '{', '}', ':', ',', ';', '.':
inc(pos)
g.kind = gtPunctuation
of '\0':
of '\0':
g.kind = gtEof
else:
if g.buf[pos] in OpChars:
else:
if g.buf[pos] in OpChars:
g.kind = gtOperator
while g.buf[pos] in OpChars: inc(pos)
else:
@@ -493,55 +493,55 @@ proc clikeNextToken(g: var TGeneralTokenizer, keywords: openArray[string],
assert false, "clikeNextToken: produced an empty token"
g.pos = pos
proc cNextToken(g: var TGeneralTokenizer) =
const
keywords: array[0..36, string] = ["_Bool", "_Complex", "_Imaginary", "auto",
"break", "case", "char", "const", "continue", "default", "do", "double",
"else", "enum", "extern", "float", "for", "goto", "if", "inline", "int",
"long", "register", "restrict", "return", "short", "signed", "sizeof",
"static", "struct", "switch", "typedef", "union", "unsigned", "void",
proc cNextToken(g: var TGeneralTokenizer) =
const
keywords: array[0..36, string] = ["_Bool", "_Complex", "_Imaginary", "auto",
"break", "case", "char", "const", "continue", "default", "do", "double",
"else", "enum", "extern", "float", "for", "goto", "if", "inline", "int",
"long", "register", "restrict", "return", "short", "signed", "sizeof",
"static", "struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"]
clikeNextToken(g, keywords, {hasPreprocessor})
proc cppNextToken(g: var TGeneralTokenizer) =
const
keywords: array[0..47, string] = ["asm", "auto", "break", "case", "catch",
"char", "class", "const", "continue", "default", "delete", "do", "double",
"else", "enum", "extern", "float", "for", "friend", "goto", "if",
"inline", "int", "long", "new", "operator", "private", "protected",
"public", "register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "template", "this", "throw", "try", "typedef",
proc cppNextToken(g: var TGeneralTokenizer) =
const
keywords: array[0..47, string] = ["asm", "auto", "break", "case", "catch",
"char", "class", "const", "continue", "default", "delete", "do", "double",
"else", "enum", "extern", "float", "for", "friend", "goto", "if",
"inline", "int", "long", "new", "operator", "private", "protected",
"public", "register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "template", "this", "throw", "try", "typedef",
"union", "unsigned", "virtual", "void", "volatile", "while"]
clikeNextToken(g, keywords, {hasPreprocessor})
proc csharpNextToken(g: var TGeneralTokenizer) =
const
keywords: array[0..76, string] = ["abstract", "as", "base", "bool", "break",
"byte", "case", "catch", "char", "checked", "class", "const", "continue",
"decimal", "default", "delegate", "do", "double", "else", "enum", "event",
"explicit", "extern", "false", "finally", "fixed", "float", "for",
"foreach", "goto", "if", "implicit", "in", "int", "interface", "internal",
"is", "lock", "long", "namespace", "new", "null", "object", "operator",
"out", "override", "params", "private", "protected", "public", "readonly",
"ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc",
"static", "string", "struct", "switch", "this", "throw", "true", "try",
"typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using",
proc csharpNextToken(g: var TGeneralTokenizer) =
const
keywords: array[0..76, string] = ["abstract", "as", "base", "bool", "break",
"byte", "case", "catch", "char", "checked", "class", "const", "continue",
"decimal", "default", "delegate", "do", "double", "else", "enum", "event",
"explicit", "extern", "false", "finally", "fixed", "float", "for",
"foreach", "goto", "if", "implicit", "in", "int", "interface", "internal",
"is", "lock", "long", "namespace", "new", "null", "object", "operator",
"out", "override", "params", "private", "protected", "public", "readonly",
"ref", "return", "sbyte", "sealed", "short", "sizeof", "stackalloc",
"static", "string", "struct", "switch", "this", "throw", "true", "try",
"typeof", "uint", "ulong", "unchecked", "unsafe", "ushort", "using",
"virtual", "void", "volatile", "while"]
clikeNextToken(g, keywords, {hasPreprocessor})
proc javaNextToken(g: var TGeneralTokenizer) =
const
keywords: array[0..52, string] = ["abstract", "assert", "boolean", "break",
"byte", "case", "catch", "char", "class", "const", "continue", "default",
"do", "double", "else", "enum", "extends", "false", "final", "finally",
"float", "for", "goto", "if", "implements", "import", "instanceof", "int",
"interface", "long", "native", "new", "null", "package", "private",
"protected", "public", "return", "short", "static", "strictfp", "super",
"switch", "synchronized", "this", "throw", "throws", "transient", "true",
proc javaNextToken(g: var TGeneralTokenizer) =
const
keywords: array[0..52, string] = ["abstract", "assert", "boolean", "break",
"byte", "case", "catch", "char", "class", "const", "continue", "default",
"do", "double", "else", "enum", "extends", "false", "final", "finally",
"float", "for", "goto", "if", "implements", "import", "instanceof", "int",
"interface", "long", "native", "new", "null", "package", "private",
"protected", "public", "return", "short", "static", "strictfp", "super",
"switch", "synchronized", "this", "throw", "throws", "transient", "true",
"try", "void", "volatile", "while"]
clikeNextToken(g, keywords, {})
proc getNextToken*(g: var TGeneralTokenizer, lang: TSourceLanguage) =
proc getNextToken*(g: var TGeneralTokenizer, lang: TSourceLanguage) =
case lang
of langNone: assert false
of langNim, langNimrod: nimNextToken(g)
@@ -549,15 +549,17 @@ proc getNextToken*(g: var TGeneralTokenizer, lang: TSourceLanguage) =
of langCsharp: csharpNextToken(g)
of langC: cNextToken(g)
of langJava: javaNextToken(g)
when isMainModule:
var keywords: seq[string]
# Try to work running in both the subdir or at the root.
for filename in ["doc/keywords.txt", "../../../doc/keywords.txt"]:
except: echo filename, " not found"
let input = string(readFile(filename))
keywords = input.split()
break
try:
let input = string(readFile(filename))
keywords = input.split()
break
except:
echo filename, " not found"
doAssert(not keywords.isNil, "Couldn't read any keywords.txt file!")
doAssert keywords.len == nimKeywords.len, "No matching lengths"
for i in 0..keywords.len-1:

View File

@@ -22,7 +22,7 @@ template reject(e: expr) =
static: assert(not compiles(e))
type
Container[T] = generic C
Container[T] = concept C
C.len is Ordinal
items(c) is iterator
for value in C:

View File

@@ -40,7 +40,7 @@ proc isSwizzle(s: string): bool {.compileTime.} =
return false
type
StringIsSwizzle = generic value
StringIsSwizzle = concept value
value.isSwizzle
SwizzleStr = static[string] and StringIsSwizzle

View File

@@ -16,10 +16,10 @@ type
TObj = object
x: int
Sortable = generic x, y
Sortable = concept x, y
(x < y) is bool
ObjectContainer = generic C
ObjectContainer = concept C
C.len is Ordinal
for v in items(C):
v.type is tuple|object
@@ -38,7 +38,7 @@ proc intval(x: int): int = 10
# check real and virtual fields
type
TFoo = generic T
TFoo = concept T
T.x
y(T)
intval T.y
@@ -50,7 +50,7 @@ proc testFoo(x: TFoo) = discard
testFoo(TObj(x: 10))
type
Matrix[Rows, Cols: static[int]; T] = generic M
Matrix[Rows, Cols: static[int]; T] = concept M
M.M == Rows
M.N == Cols
M.T is T

View File

@@ -1,5 +1,5 @@
type
hasFieldX = generic z
hasFieldX = concept z
z.x is int
obj_x = object
@@ -7,7 +7,7 @@ type
ref_obj_x = ref object
x: int
ref_to_obj_x = ref obj_x
p_o_x = ptr obj_x

View File

@@ -5,6 +5,7 @@ version 0.10.4
- improve GC-unsafety warnings
- make 'nil' work for 'add' and 'len'
- add "all threads are blocked" detection to 'spawn'
- overloading of '='
version 1.0
@@ -17,8 +18,6 @@ version 1.0
- The bitwise 'not' operator will be renamed to 'bnot' to
prevent 'not 4 == 5' from compiling. -> requires 'mixin' annotation for procs!
- iterators always require a return type
- overloading of '='
- make nimble part of the distribution
- split docgen into separate tool
- special rule for ``[]=``, items, pairs

View File

@@ -41,7 +41,7 @@ News
and not as ``type((x).name)``. Note that this also affects the AST
structure; for immediate macro parameters ``nkCall('addr', 'x')`` is
produced instead of ``nkAddr('x')``.
- ``concept`` is now a keyword and is used instead of ``generic``.
Language Additions