mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-05 12:37:46 +00:00
digit-sequence? '.' digit-sequence exponent-part? digit-sequence '.' exponent-part? exponent-part: [eE] [+-]? digit-sequence
788 lines
22 KiB
Nim
788 lines
22 KiB
Nim
#
|
|
#
|
|
# c2nim - C to Nimrod source converter
|
|
# (c) Copyright 2012 Andreas Rumpf
|
|
#
|
|
# See the file "copying.txt", included in this
|
|
# distribution, for details about the copyright.
|
|
#
|
|
|
|
# This module implements an Ansi C scanner. This is an adaption from
|
|
# the scanner module. Keywords are not handled here, but in the parser to make
|
|
# it more flexible.
|
|
|
|
|
|
import
|
|
options, msgs, strutils, platform, nimlexbase, llstream
|
|
|
|
const
|
|
MaxLineLength* = 80 # lines longer than this lead to a warning
|
|
numChars*: TCharSet = {'0'..'9', 'a'..'z', 'A'..'Z'}
|
|
SymChars*: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '_', '\x80'..'\xFF'}
|
|
SymStartChars*: TCharSet = {'a'..'z', 'A'..'Z', '_', '\x80'..'\xFF'}
|
|
|
|
type
|
|
TTokKind* = enum
|
|
pxInvalid, pxEof,
|
|
pxMacroParam, # fake token: macro parameter (with its index)
|
|
pxStarComment, # /* */ comment
|
|
pxLineComment, # // comment
|
|
pxDirective, # #define, etc.
|
|
pxDirectiveParLe, # #define m( with parle (yes, C is that ugly!)
|
|
pxDirConc, # ##
|
|
pxNewLine, # newline: end of directive
|
|
pxAmp, # &
|
|
pxAmpAmp, # &&
|
|
pxAmpAsgn, # &=
|
|
pxAmpAmpAsgn, # &&=
|
|
pxBar, # |
|
|
pxBarBar, # ||
|
|
pxBarAsgn, # |=
|
|
pxBarBarAsgn, # ||=
|
|
pxNot, # !
|
|
pxPlusPlus, # ++
|
|
pxMinusMinus, # --
|
|
pxPlus, # +
|
|
pxPlusAsgn, # +=
|
|
pxMinus, # -
|
|
pxMinusAsgn, # -=
|
|
pxMod, # %
|
|
pxModAsgn, # %=
|
|
pxSlash, # /
|
|
pxSlashAsgn, # /=
|
|
pxStar, # *
|
|
pxStarAsgn, # *=
|
|
pxHat, # ^
|
|
pxHatAsgn, # ^=
|
|
pxAsgn, # =
|
|
pxEquals, # ==
|
|
pxDot, # .
|
|
pxDotDotDot, # ...
|
|
pxLe, # <=
|
|
pxLt, # <
|
|
pxGe, # >=
|
|
pxGt, # >
|
|
pxNeq, # !=
|
|
pxConditional, # ?
|
|
pxShl, # <<
|
|
pxShlAsgn, # <<=
|
|
pxShr, # >>
|
|
pxShrAsgn, # >>=
|
|
pxTilde, # ~
|
|
pxTildeAsgn, # ~=
|
|
pxArrow, # ->
|
|
pxScope, # ::
|
|
|
|
pxStrLit,
|
|
pxCharLit,
|
|
pxSymbol, # a symbol
|
|
pxIntLit,
|
|
pxInt64Lit, # long constant like 0x70fffffff or out of int range
|
|
pxFloatLit,
|
|
pxParLe, pxBracketLe, pxCurlyLe, # this order is important
|
|
pxParRi, pxBracketRi, pxCurlyRi, # for macro argument parsing!
|
|
pxComma, pxSemiColon, pxColon,
|
|
pxAngleRi # '>' but determined to be the end of a
|
|
# template's angle bracket
|
|
TTokKinds* = set[TTokKind]
|
|
|
|
type
|
|
TNumericalBase* = enum base10, base2, base8, base16
|
|
TToken* = object
|
|
xkind*: TTokKind # the type of the token
|
|
s*: string # parsed symbol, char or string literal
|
|
iNumber*: BiggestInt # the parsed integer literal;
|
|
# if xkind == pxMacroParam: parameter's position
|
|
fNumber*: BiggestFloat # the parsed floating point literal
|
|
base*: TNumericalBase # the numerical base; only valid for int
|
|
# or float literals
|
|
next*: ref TToken # for C we need arbitrary look-ahead :-(
|
|
|
|
TLexer* = object of TBaseLexer
|
|
fileIdx*: int32
|
|
inDirective: bool
|
|
|
|
proc getTok*(L: var TLexer, tok: var TToken)
|
|
proc printTok*(tok: TToken)
|
|
proc `$`*(tok: TToken): string
|
|
# implementation
|
|
|
|
var
|
|
gLinesCompiled*: int
|
|
|
|
proc fillToken(L: var TToken) =
|
|
L.xkind = pxInvalid
|
|
L.iNumber = 0
|
|
L.s = ""
|
|
L.fNumber = 0.0
|
|
L.base = base10
|
|
|
|
proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) =
|
|
openBaseLexer(lex, inputstream)
|
|
lex.fileIdx = filename.fileInfoIdx
|
|
|
|
proc closeLexer*(lex: var TLexer) =
|
|
inc(gLinesCompiled, lex.LineNumber)
|
|
closeBaseLexer(lex)
|
|
|
|
proc getColumn*(L: TLexer): int =
|
|
result = getColNumber(L, L.bufPos)
|
|
|
|
proc getLineInfo*(L: TLexer): TLineInfo =
|
|
result = newLineInfo(L.fileIdx, L.linenumber, getColNumber(L, L.bufpos))
|
|
|
|
proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") =
|
|
msgs.GlobalError(getLineInfo(L), msg, arg)
|
|
|
|
proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") =
|
|
var info = newLineInfo(L.fileIdx, L.linenumber, pos - L.lineStart)
|
|
msgs.GlobalError(info, msg, arg)
|
|
|
|
proc tokKindToStr*(k: TTokKind): string =
|
|
case k
|
|
of pxEof: result = "[EOF]"
|
|
of pxInvalid: result = "[invalid]"
|
|
of pxMacroParam: result = "[macro param]"
|
|
of pxStarComment, pxLineComment: result = "[comment]"
|
|
of pxStrLit: result = "[string literal]"
|
|
of pxCharLit: result = "[char literal]"
|
|
|
|
of pxDirective, pxDirectiveParLe: result = "#" # #define, etc.
|
|
of pxDirConc: result = "##"
|
|
of pxNewLine: result = "[NewLine]"
|
|
of pxAmp: result = "&" # &
|
|
of pxAmpAmp: result = "&&" # &&
|
|
of pxAmpAsgn: result = "&=" # &=
|
|
of pxAmpAmpAsgn: result = "&&=" # &&=
|
|
of pxBar: result = "|" # |
|
|
of pxBarBar: result = "||" # ||
|
|
of pxBarAsgn: result = "|=" # |=
|
|
of pxBarBarAsgn: result = "||=" # ||=
|
|
of pxNot: result = "!" # !
|
|
of pxPlusPlus: result = "++" # ++
|
|
of pxMinusMinus: result = "--" # --
|
|
of pxPlus: result = "+" # +
|
|
of pxPlusAsgn: result = "+=" # +=
|
|
of pxMinus: result = "-" # -
|
|
of pxMinusAsgn: result = "-=" # -=
|
|
of pxMod: result = "%" # %
|
|
of pxModAsgn: result = "%=" # %=
|
|
of pxSlash: result = "/" # /
|
|
of pxSlashAsgn: result = "/=" # /=
|
|
of pxStar: result = "*" # *
|
|
of pxStarAsgn: result = "*=" # *=
|
|
of pxHat: result = "^" # ^
|
|
of pxHatAsgn: result = "^=" # ^=
|
|
of pxAsgn: result = "=" # =
|
|
of pxEquals: result = "==" # ==
|
|
of pxDot: result = "." # .
|
|
of pxDotDotDot: result = "..." # ...
|
|
of pxLe: result = "<=" # <=
|
|
of pxLt: result = "<" # <
|
|
of pxGe: result = ">=" # >=
|
|
of pxGt: result = ">" # >
|
|
of pxNeq: result = "!=" # !=
|
|
of pxConditional: result = "?"
|
|
of pxShl: result = "<<"
|
|
of pxShlAsgn: result = "<<="
|
|
of pxShr: result = ">>"
|
|
of pxShrAsgn: result = ">>="
|
|
of pxTilde: result = "~"
|
|
of pxTildeAsgn: result = "~="
|
|
of pxArrow: result = "->"
|
|
of pxScope: result = "::"
|
|
|
|
of pxSymbol: result = "[identifier]"
|
|
of pxIntLit, pxInt64Lit: result = "[integer literal]"
|
|
of pxFloatLit: result = "[floating point literal]"
|
|
of pxParLe: result = "("
|
|
of pxParRi: result = ")"
|
|
of pxBracketLe: result = "["
|
|
of pxBracketRi: result = "]"
|
|
of pxComma: result = ","
|
|
of pxSemiColon: result = ";"
|
|
of pxColon: result = ":"
|
|
of pxCurlyLe: result = "{"
|
|
of pxCurlyRi: result = "}"
|
|
of pxAngleRi: result = "> [end of template]"
|
|
|
|
proc `$`(tok: TToken): string =
|
|
case tok.xkind
|
|
of pxSymbol, pxInvalid, pxStarComment, pxLineComment, pxStrLit: result = tok.s
|
|
of pxIntLit, pxInt64Lit: result = $tok.iNumber
|
|
of pxFloatLit: result = $tok.fNumber
|
|
else: result = tokKindToStr(tok.xkind)
|
|
|
|
proc printTok(tok: TToken) =
|
|
writeln(stdout, $tok)
|
|
|
|
proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: TCharSet) =
|
|
# matches ([chars]_)*
|
|
var pos = L.bufpos # use registers for pos, buf
|
|
var buf = L.buf
|
|
while true:
|
|
if buf[pos] in chars:
|
|
add(tok.s, buf[pos])
|
|
inc(pos)
|
|
else:
|
|
break
|
|
if buf[pos] == '_':
|
|
add(tok.s, '_')
|
|
inc(pos)
|
|
L.bufPos = pos
|
|
|
|
proc isFloatLiteral(s: string): bool =
|
|
for i in countup(0, len(s)-1):
|
|
if s[i] in {'.', 'e', 'E'}:
|
|
return true
|
|
|
|
proc getNumber2(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufpos + 2 # skip 0b
|
|
tok.base = base2
|
|
var xi: BiggestInt = 0
|
|
var bits = 0
|
|
while true:
|
|
case L.buf[pos]
|
|
of 'A'..'Z', 'a'..'z':
|
|
# ignore type suffix:
|
|
inc(pos)
|
|
of '2'..'9', '.':
|
|
lexMessage(L, errInvalidNumber)
|
|
inc(pos)
|
|
of '_':
|
|
inc(pos)
|
|
of '0', '1':
|
|
xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
|
|
inc(pos)
|
|
inc(bits)
|
|
else: break
|
|
tok.iNumber = xi
|
|
if (bits > 32): tok.xkind = pxInt64Lit
|
|
else: tok.xkind = pxIntLit
|
|
L.bufpos = pos
|
|
|
|
proc getNumber8(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufpos + 1 # skip 0
|
|
tok.base = base8
|
|
var xi: BiggestInt = 0
|
|
var bits = 0
|
|
while true:
|
|
case L.buf[pos]
|
|
of 'A'..'Z', 'a'..'z':
|
|
# ignore type suffix:
|
|
inc(pos)
|
|
of '8'..'9', '.':
|
|
lexMessage(L, errInvalidNumber)
|
|
inc(pos)
|
|
of '_':
|
|
inc(pos)
|
|
of '0'..'7':
|
|
xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
|
|
inc(pos)
|
|
inc(bits)
|
|
else: break
|
|
tok.iNumber = xi
|
|
if (bits > 12): tok.xkind = pxInt64Lit
|
|
else: tok.xkind = pxIntLit
|
|
L.bufpos = pos
|
|
|
|
proc getNumber16(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufpos + 2 # skip 0x
|
|
tok.base = base16
|
|
var xi: BiggestInt = 0
|
|
var bits = 0
|
|
while true:
|
|
case L.buf[pos]
|
|
of 'G'..'Z', 'g'..'z':
|
|
# ignore type suffix:
|
|
inc(pos)
|
|
of '_': inc(pos)
|
|
of '0'..'9':
|
|
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
|
|
inc(pos)
|
|
inc(bits, 4)
|
|
of 'a'..'f':
|
|
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
|
|
inc(pos)
|
|
inc(bits, 4)
|
|
of 'A'..'F':
|
|
xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
|
|
inc(pos)
|
|
inc(bits, 4)
|
|
else: break
|
|
tok.iNumber = xi
|
|
if bits > 32: tok.xkind = pxInt64Lit
|
|
else: tok.xkind = pxIntLit
|
|
L.bufpos = pos
|
|
|
|
proc getFloating(L: var TLexer, tok: var TToken) =
|
|
matchUnderscoreChars(L, tok, {'0'..'9'})
|
|
if L.buf[L.bufpos] in {'e', 'E'}:
|
|
add(tok.s, L.buf[L.bufpos])
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] in {'+', '-'}:
|
|
add(tok.s, L.buf[L.bufpos])
|
|
inc(L.bufpos)
|
|
matchUnderscoreChars(L, tok, {'0'..'9'})
|
|
|
|
proc getNumber(L: var TLexer, tok: var TToken) =
|
|
tok.base = base10
|
|
if L.buf[L.bufpos] == '.':
|
|
add(tok.s, "0.")
|
|
inc(L.bufpos)
|
|
getFloating(L, tok)
|
|
else:
|
|
matchUnderscoreChars(L, tok, {'0'..'9'})
|
|
if L.buf[L.bufpos] == '.':
|
|
add(tok.s, '.')
|
|
inc(L.bufpos)
|
|
getFloating(L, tok)
|
|
try:
|
|
if isFloatLiteral(tok.s):
|
|
tok.fnumber = parseFloat(tok.s)
|
|
tok.xkind = pxFloatLit
|
|
else:
|
|
tok.iNumber = parseInt(tok.s)
|
|
if (tok.iNumber < low(int32)) or (tok.iNumber > high(int32)):
|
|
tok.xkind = pxInt64Lit
|
|
else:
|
|
tok.xkind = pxIntLit
|
|
except EInvalidValue:
|
|
lexMessage(L, errInvalidNumber, tok.s)
|
|
except EOverflow:
|
|
lexMessage(L, errNumberOutOfRange, tok.s)
|
|
# ignore type suffix:
|
|
while L.buf[L.bufpos] in {'A'..'Z', 'a'..'z'}: inc(L.bufpos)
|
|
|
|
proc handleCRLF(L: var TLexer, pos: int): int =
|
|
case L.buf[pos]
|
|
of CR: result = nimlexbase.handleCR(L, pos)
|
|
of LF: result = nimlexbase.handleLF(L, pos)
|
|
else: result = pos
|
|
|
|
proc escape(L: var TLexer, tok: var TToken, allowEmpty=false) =
|
|
inc(L.bufpos) # skip \
|
|
case L.buf[L.bufpos]
|
|
of 'b', 'B':
|
|
add(tok.s, '\b')
|
|
inc(L.bufpos)
|
|
of 't', 'T':
|
|
add(tok.s, '\t')
|
|
inc(L.bufpos)
|
|
of 'n', 'N':
|
|
add(tok.s, '\L')
|
|
inc(L.bufpos)
|
|
of 'f', 'F':
|
|
add(tok.s, '\f')
|
|
inc(L.bufpos)
|
|
of 'r', 'R':
|
|
add(tok.s, '\r')
|
|
inc(L.bufpos)
|
|
of '\'':
|
|
add(tok.s, '\'')
|
|
inc(L.bufpos)
|
|
of '"':
|
|
add(tok.s, '"')
|
|
inc(L.bufpos)
|
|
of '\\':
|
|
add(tok.s, '\b')
|
|
inc(L.bufpos)
|
|
of '0'..'7':
|
|
var xi = ord(L.buf[L.bufpos]) - ord('0')
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] in {'0'..'7'}:
|
|
xi = (xi shl 3) or (ord(L.buf[L.bufpos]) - ord('0'))
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] in {'0'..'7'}:
|
|
xi = (xi shl 3) or (ord(L.buf[L.bufpos]) - ord('0'))
|
|
inc(L.bufpos)
|
|
add(tok.s, chr(xi))
|
|
of 'x':
|
|
var xi = 0
|
|
inc(L.bufpos)
|
|
while true:
|
|
case L.buf[L.bufpos]
|
|
of '0'..'9':
|
|
xi = `shl`(xi, 4) or (ord(L.buf[L.bufpos]) - ord('0'))
|
|
inc(L.bufpos)
|
|
of 'a'..'f':
|
|
xi = `shl`(xi, 4) or (ord(L.buf[L.bufpos]) - ord('a') + 10)
|
|
inc(L.bufpos)
|
|
of 'A'..'F':
|
|
xi = `shl`(xi, 4) or (ord(L.buf[L.bufpos]) - ord('A') + 10)
|
|
inc(L.bufpos)
|
|
else:
|
|
break
|
|
add(tok.s, chr(xi))
|
|
elif not allowEmpty:
|
|
lexMessage(L, errInvalidCharacterConstant)
|
|
|
|
proc getCharLit(L: var TLexer, tok: var TToken) =
|
|
inc(L.bufpos) # skip '
|
|
if L.buf[L.bufpos] == '\\':
|
|
escape(L, tok)
|
|
else:
|
|
add(tok.s, L.buf[L.bufpos])
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '\'':
|
|
inc(L.bufpos)
|
|
else:
|
|
lexMessage(L, errMissingFinalQuote)
|
|
tok.xkind = pxCharLit
|
|
|
|
proc getString(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufPos + 1 # skip "
|
|
var buf = L.buf # put `buf` in a register
|
|
var line = L.linenumber # save linenumber for better error message
|
|
while true:
|
|
case buf[pos]
|
|
of '\"':
|
|
inc(pos)
|
|
break
|
|
of CR:
|
|
pos = nimlexbase.HandleCR(L, pos)
|
|
buf = L.buf
|
|
of LF:
|
|
pos = nimlexbase.HandleLF(L, pos)
|
|
buf = L.buf
|
|
of nimlexbase.EndOfFile:
|
|
var line2 = L.linenumber
|
|
L.LineNumber = line
|
|
lexMessagePos(L, errClosingQuoteExpected, L.lineStart)
|
|
L.LineNumber = line2
|
|
break
|
|
of '\\':
|
|
# we allow an empty \ for line concatenation, but we don't require it
|
|
# for line concatenation
|
|
L.bufpos = pos
|
|
escape(L, tok, allowEmpty=true)
|
|
pos = L.bufpos
|
|
else:
|
|
add(tok.s, buf[pos])
|
|
inc(pos)
|
|
L.bufpos = pos
|
|
tok.xkind = pxStrLit
|
|
|
|
proc getSymbol(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufpos
|
|
var buf = L.buf
|
|
while true:
|
|
var c = buf[pos]
|
|
if c notin SymChars: break
|
|
add(tok.s, c)
|
|
inc(pos)
|
|
L.bufpos = pos
|
|
tok.xkind = pxSymbol
|
|
|
|
proc scanLineComment(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufpos
|
|
var buf = L.buf
|
|
# a comment ends if the next line does not start with the // on the same
|
|
# column after only whitespace
|
|
tok.xkind = pxLineComment
|
|
var col = getColNumber(L, pos)
|
|
while true:
|
|
inc(pos, 2) # skip //
|
|
add(tok.s, '#')
|
|
while not (buf[pos] in {CR, LF, nimlexbase.EndOfFile}):
|
|
add(tok.s, buf[pos])
|
|
inc(pos)
|
|
pos = handleCRLF(L, pos)
|
|
buf = L.buf
|
|
var indent = 0
|
|
while buf[pos] == ' ':
|
|
inc(pos)
|
|
inc(indent)
|
|
if (col == indent) and (buf[pos] == '/') and (buf[pos + 1] == '/'):
|
|
add(tok.s, "\n")
|
|
else:
|
|
break
|
|
L.bufpos = pos
|
|
|
|
proc scanStarComment(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufpos
|
|
var buf = L.buf
|
|
tok.s = "#"
|
|
tok.xkind = pxStarComment
|
|
while true:
|
|
case buf[pos]
|
|
of CR, LF:
|
|
pos = handleCRLF(L, pos)
|
|
buf = L.buf
|
|
add(tok.s, "\n#")
|
|
# skip annoying stars as line prefix: (eg.
|
|
# /*
|
|
# * ugly comment <-- this star
|
|
# */
|
|
while buf[pos] in {' ', '\t'}:
|
|
add(tok.s, ' ')
|
|
inc(pos)
|
|
if buf[pos] == '*' and buf[pos+1] != '/': inc(pos)
|
|
of '*':
|
|
inc(pos)
|
|
if buf[pos] == '/':
|
|
inc(pos)
|
|
break
|
|
else:
|
|
add(tok.s, '*')
|
|
of nimlexbase.EndOfFile:
|
|
lexMessage(L, errTokenExpected, "*/")
|
|
else:
|
|
add(tok.s, buf[pos])
|
|
inc(pos)
|
|
L.bufpos = pos
|
|
|
|
proc skip(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufpos
|
|
var buf = L.buf
|
|
while true:
|
|
case buf[pos]
|
|
of '\\':
|
|
# Ignore \ line continuation characters when not inDirective
|
|
inc(pos)
|
|
if L.inDirective:
|
|
while buf[pos] in {' ', '\t'}: inc(pos)
|
|
if buf[pos] in {CR, LF}:
|
|
pos = handleCRLF(L, pos)
|
|
buf = L.buf
|
|
of ' ', Tabulator:
|
|
inc(pos) # newline is special:
|
|
of CR, LF:
|
|
pos = handleCRLF(L, pos)
|
|
buf = L.buf
|
|
if L.inDirective:
|
|
tok.xkind = pxNewLine
|
|
L.inDirective = false
|
|
else:
|
|
break # EndOfFile also leaves the loop
|
|
L.bufpos = pos
|
|
|
|
proc getDirective(L: var TLexer, tok: var TToken) =
|
|
var pos = L.bufpos + 1
|
|
var buf = L.buf
|
|
while buf[pos] in {' ', '\t'}: inc(pos)
|
|
while buf[pos] in SymChars:
|
|
add(tok.s, buf[pos])
|
|
inc(pos)
|
|
# a HACK: we need to distinguish
|
|
# #define x (...)
|
|
# from:
|
|
# #define x(...)
|
|
#
|
|
L.bufpos = pos
|
|
# look ahead:
|
|
while buf[pos] in {' ', '\t'}: inc(pos)
|
|
while buf[pos] in SymChars: inc(pos)
|
|
if buf[pos] == '(': tok.xkind = pxDirectiveParLe
|
|
else: tok.xkind = pxDirective
|
|
L.inDirective = true
|
|
|
|
proc getTok(L: var TLexer, tok: var TToken) =
|
|
tok.xkind = pxInvalid
|
|
fillToken(tok)
|
|
skip(L, tok)
|
|
if tok.xkind == pxNewLine: return
|
|
var c = L.buf[L.bufpos]
|
|
if c in SymStartChars:
|
|
getSymbol(L, tok)
|
|
elif c == '0':
|
|
case L.buf[L.bufpos+1]
|
|
of 'x', 'X': getNumber16(L, tok)
|
|
of 'b', 'B': getNumber2(L, tok)
|
|
of '1'..'7': getNumber8(L, tok)
|
|
else: getNumber(L, tok)
|
|
elif c in {'1'..'9'} or (c == '.' and L.buf[L.bufpos+1] in {'0'..'9'}):
|
|
getNumber(L, tok)
|
|
else:
|
|
case c
|
|
of ';':
|
|
tok.xkind = pxSemicolon
|
|
inc(L.bufpos)
|
|
of '/':
|
|
if L.buf[L.bufpos + 1] == '/':
|
|
scanLineComment(L, tok)
|
|
elif L.buf[L.bufpos+1] == '*':
|
|
inc(L.bufpos, 2)
|
|
scanStarComment(L, tok)
|
|
elif L.buf[L.bufpos+1] == '=':
|
|
inc(L.bufpos, 2)
|
|
tok.xkind = pxSlashAsgn
|
|
else:
|
|
tok.xkind = pxSlash
|
|
inc(L.bufpos)
|
|
of ',':
|
|
tok.xkind = pxComma
|
|
inc(L.bufpos)
|
|
of '(':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxParLe
|
|
of '*':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxStarAsgn
|
|
else:
|
|
tok.xkind = pxStar
|
|
of ')':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxParRi
|
|
of '[':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxBracketLe
|
|
of ']':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxBracketRi
|
|
of '.':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '.' and L.buf[L.bufpos+1] == '.':
|
|
tok.xkind = pxDotDotDot
|
|
inc(L.bufpos, 2)
|
|
else:
|
|
tok.xkind = pxDot
|
|
of '{':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxCurlyLe
|
|
of '}':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxCurlyRi
|
|
of '+':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
tok.xkind = pxPlusAsgn
|
|
inc(L.bufpos)
|
|
elif L.buf[L.bufpos] == '+':
|
|
tok.xkind = pxPlusPlus
|
|
inc(L.bufpos)
|
|
else:
|
|
tok.xkind = pxPlus
|
|
of '-':
|
|
inc(L.bufpos)
|
|
case L.buf[L.bufpos]
|
|
of '>':
|
|
tok.xkind = pxArrow
|
|
inc(L.bufpos)
|
|
of '=':
|
|
tok.xkind = pxMinusAsgn
|
|
inc(L.bufpos)
|
|
of '-':
|
|
tok.xkind = pxMinusMinus
|
|
inc(L.bufpos)
|
|
else:
|
|
tok.xkind = pxMinus
|
|
of '?':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxConditional
|
|
of ':':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == ':':
|
|
tok.xkind = pxScope
|
|
inc(L.bufpos)
|
|
else:
|
|
tok.xkind = pxColon
|
|
of '!':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
tok.xkind = pxNeq
|
|
inc(L.bufpos)
|
|
else:
|
|
tok.xkind = pxNot
|
|
of '<':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxLe
|
|
elif L.buf[L.bufpos] == '<':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxShlAsgn
|
|
else:
|
|
tok.xkind = pxShl
|
|
else:
|
|
tok.xkind = pxLt
|
|
of '>':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxGe
|
|
elif L.buf[L.bufpos] == '>':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxShrAsgn
|
|
else:
|
|
tok.xkind = pxShr
|
|
else:
|
|
tok.xkind = pxGt
|
|
of '=':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
tok.xkind = pxEquals
|
|
inc(L.bufpos)
|
|
else:
|
|
tok.xkind = pxAsgn
|
|
of '&':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
tok.xkind = pxAmpAsgn
|
|
inc(L.bufpos)
|
|
elif L.buf[L.bufpos] == '&':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxAmpAmpAsgn
|
|
else:
|
|
tok.xkind = pxAmpAmp
|
|
else:
|
|
tok.xkind = pxAmp
|
|
of '|':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
tok.xkind = pxBarAsgn
|
|
inc(L.bufpos)
|
|
elif L.buf[L.bufpos] == '|':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
inc(L.bufpos)
|
|
tok.xkind = pxBarBarAsgn
|
|
else:
|
|
tok.xkind = pxBarBar
|
|
else:
|
|
tok.xkind = pxBar
|
|
of '^':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
tok.xkind = pxHatAsgn
|
|
inc(L.bufpos)
|
|
else:
|
|
tok.xkind = pxHat
|
|
of '%':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
tok.xkind = pxModAsgn
|
|
inc(L.bufpos)
|
|
else:
|
|
tok.xkind = pxMod
|
|
of '~':
|
|
inc(L.bufpos)
|
|
if L.buf[L.bufpos] == '=':
|
|
tok.xkind = pxTildeAsgn
|
|
inc(L.bufpos)
|
|
else:
|
|
tok.xkind = pxTilde
|
|
of '#':
|
|
if L.buf[L.bufpos+1] == '#':
|
|
inc(L.bufpos, 2)
|
|
tok.xkind = pxDirConc
|
|
else:
|
|
getDirective(L, tok)
|
|
of '"': getString(L, tok)
|
|
of '\'': getCharLit(L, tok)
|
|
of nimlexbase.EndOfFile:
|
|
tok.xkind = pxEof
|
|
else:
|
|
tok.s = $c
|
|
tok.xkind = pxInvalid
|
|
lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
|
|
inc(L.bufpos)
|