From 49b953533a05c984524daec359dbfb692a692eab Mon Sep 17 00:00:00 2001 From: Oscar Campbell Date: Sun, 31 May 2015 00:56:27 +0200 Subject: [PATCH 1/4] Fix #2523 - first commit. Needs some cleanup. --- compiler/lexer.nim | 302 ++++++++++++++++++++++++++++++--------------- 1 file changed, 204 insertions(+), 98 deletions(-) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 8080e0e8c3..bdbaa79b1a 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -240,12 +240,43 @@ proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = break if buf[pos] == '_': if buf[pos+1] notin chars: - lexMessage(L, errInvalidToken, "_") + lexMessage(L, errInvalidToken, "_") # TODO/ozra - lift out for better msg break add(tok.literal, '_') inc(pos) L.bufpos = pos +# Used for getting human friendlier err messages. Also only used in getNumber +proc matchAllLiteralishForMessage(L: var TLexer, tok: var TToken, startpos: int) = + # Note: high(int32)): if result.tokType == tkIntLit: result.tokType = tkInt64Lit elif result.tokType in {tkInt8Lit, tkInt16Lit, tkInt32Lit}: - lexMessage(L, errNumberOutOfRange, result.literal) + lexLiteralNumberMessage(L, errNumberOutOfRange, startpos) elif result.tokType == tkInt8Lit and (result.iNumber < int8.low or result.iNumber > int8.high): - lexMessage(L, errNumberOutOfRange, result.literal) + lexLiteralNumberMessage(L, errNumberOutOfRange, startpos) elif result.tokType == tkInt16Lit and (result.iNumber < int16.low or result.iNumber > int16.high): - lexMessage(L, errNumberOutOfRange, result.literal) + lexLiteralNumberMessage(L, errNumberOutOfRange, startpos) + except ValueError: - lexMessage(L, errInvalidNumber, result.literal) + lexLiteralNumberMessage(L, errInvalidNumber, startpos) except OverflowError, RangeError: - lexMessage(L, errNumberOutOfRange, result.literal) - L.bufpos = endpos + lexLiteralNumberMessage(L, errNumberOutOfRange, startpos) + + L.bufpos = postPos proc handleHexChar(L: var TLexer, xi: var int) = case L.buf[L.bufpos] From 6a43b0e81721919d864ce053e0c0c624ba8c0127 Mon Sep 17 00:00:00 2001 From: Oscar Campbell Date: Sun, 31 May 2015 02:55:06 +0200 Subject: [PATCH 2/4] Clean up to compiler style. Refine error-msg for illegal octal 'O' --- compiler/lexer.nim | 188 +++++++++++++++------------------------------ compiler/msgs.nim | 8 +- 2 files changed, 68 insertions(+), 128 deletions(-) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index bdbaa79b1a..e1615ac369 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -229,54 +229,6 @@ proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") = var info = newLineInfo(L.fileIdx, L.lineNumber, pos - L.lineStart) L.dispMessage(info, msg, arg) -proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = - var pos = L.bufpos # use registers for pos, buf - var buf = L.buf - while true: - if buf[pos] in chars: - add(tok.literal, buf[pos]) - inc(pos) - else: - break - if buf[pos] == '_': - if buf[pos+1] notin chars: - lexMessage(L, errInvalidToken, "_") # TODO/ozra - lift out for better msg - break - add(tok.literal, '_') - inc(pos) - L.bufpos = pos - -# Used for getting human friendlier err messages. Also only used in getNumber -proc matchAllLiteralishForMessage(L: var TLexer, tok: var TToken, startpos: int) = - # Note: high(int32)): if result.tokType == tkIntLit: result.tokType = tkInt64Lit elif result.tokType in {tkInt8Lit, tkInt16Lit, tkInt32Lit}: - lexLiteralNumberMessage(L, errNumberOutOfRange, startpos) + lexMessageLitNum(L, errNumberOutOfRange, startpos) elif result.tokType == tkInt8Lit and (result.iNumber < int8.low or result.iNumber > int8.high): - lexLiteralNumberMessage(L, errNumberOutOfRange, startpos) + lexMessageLitNum(L, errNumberOutOfRange, startpos) elif result.tokType == tkInt16Lit and (result.iNumber < int16.low or result.iNumber > int16.high): - lexLiteralNumberMessage(L, errNumberOutOfRange, startpos) - + lexMessageLitNum(L, errNumberOutOfRange, startpos) except ValueError: - lexLiteralNumberMessage(L, errInvalidNumber, startpos) + lexMessageLitNum(L, errInvalidNumber, startpos) except OverflowError, RangeError: - lexLiteralNumberMessage(L, errNumberOutOfRange, startpos) - + lexMessageLitNum(L, errNumberOutOfRange, startpos) L.bufpos = postPos proc handleHexChar(L: var TLexer, xi: var int) = diff --git a/compiler/msgs.nim b/compiler/msgs.nim index 041a181be7..e97032db4f 100644 --- a/compiler/msgs.nim +++ b/compiler/msgs.nim @@ -17,10 +17,9 @@ type errIntLiteralExpected, errInvalidCharacterConstant, errClosingTripleQuoteExpected, errClosingQuoteExpected, errTabulatorsAreNotAllowed, errInvalidToken, errLineTooLong, - errInvalidNumber, errNumberOutOfRange, errNnotAllowedInCharacter, - errClosingBracketExpected, errMissingFinalQuote, errIdentifierExpected, - errNewlineExpected, - errInvalidModuleName, + errInvalidNumber, errInvalidNumberOctalCode, errNumberOutOfRange, + errNnotAllowedInCharacter, errClosingBracketExpected, errMissingFinalQuote, + errIdentifierExpected, errNewlineExpected, errInvalidModuleName, errOperatorExpected, errTokenExpected, errStringAfterIncludeExpected, errRecursiveDependencyX, errOnOrOffExpected, errNoneSpeedOrSizeExpected, errInvalidPragma, errUnknownPragma, errInvalidDirectiveX, @@ -143,6 +142,7 @@ const errInvalidToken: "invalid token: $1", errLineTooLong: "line too long", errInvalidNumber: "$1 is not a valid number", + errInvalidNumberOctalCode: "$1 is not a valid number; did you mean octal? Then use one of '0o', '0c' or '0C'.", errNumberOutOfRange: "number $1 out of valid range", errNnotAllowedInCharacter: "\\n not allowed in character literal", errClosingBracketExpected: "closing ']' expected, but end of file reached", From 4a1e45e3f85c668185971627849e4420c72af767 Mon Sep 17 00:00:00 2001 From: Oscar Campbell Date: Sun, 31 May 2015 03:11:44 +0200 Subject: [PATCH 3/4] Remove forgotten debug echo statement. --- compiler/lexer.nim | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index e1615ac369..bc4b46bc92 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -414,7 +414,6 @@ proc getNumber(L: var TLexer): TToken = # Is there still a literalish char awaiting? Then it's an error! if L.buf[postPos] in literalishCharsNoDot or (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}): - echo "likely literal error, additionally: '", L.buf[postPos], L.buf[postPos+1], L.buf[postPos+2], L.buf[postPos+3], "', ", postPos lexMessageLitNum(L, errInvalidNumber, startpos) # Third stage, extract actual number L.bufpos = startpos # restore position From 44b0bf59d9538343178e52346991b527aeb53558 Mon Sep 17 00:00:00 2001 From: Oscar Campbell Date: Tue, 2 Jun 2015 22:22:27 +0200 Subject: [PATCH 4/4] Change const def to cleaner code per suggestion. --- compiler/lexer.nim | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index bc4b46bc92..a605183236 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -266,8 +266,7 @@ proc getNumber(L: var TLexer): TToken = xi: BiggestInt const literalishChars = { 'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'c', 'C', 'b', 'B', '_', '.', '\''} - const literalishCharsNoDot = { 'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', - 'c', 'C', 'b', 'B', '_', '\''} + const literalishCharsNoDot = literalishChars - {'.'} proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]) = var pos = L.bufpos # use registers for pos, buf