mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-09 14:32:53 +00:00
implemented Unicode operators (#18789)
* implemented Unicode operators; refs https://github.com/nim-lang/RFCs/issues/388 * bugfix * better test * arguably more elegant implementation * Update changelog.md Co-authored-by: konsumlamm <44230978+konsumlamm@users.noreply.github.com> Co-authored-by: konsumlamm <44230978+konsumlamm@users.noreply.github.com>
This commit is contained in:
@@ -445,6 +445,13 @@ proc mysort(s: seq; cmp: proc(a, b: T): int) {.effectsOf: cmp.}
|
||||
To enable the new effect system, use --experimental:strictEffects.
|
||||
|
||||
|
||||
- Nim now supports a small subset of Unicode operators as operator symbols.
|
||||
The supported symbols are: "∙ ∘ × ★ ⊗ ⊘ ⊙ ⊛ ⊠ ⊡ ∩ ∧ ⊓ ± ⊕ ⊖ ⊞ ⊟ ∪ ∨ ⊔".
|
||||
To enable this feature, use `--experimental:unicodeOperators`. Note that due
|
||||
to parser limitations you **cannot** enable this feature via a
|
||||
pragma `{.experimental: "unicodeOperators".}` reliably.
|
||||
|
||||
|
||||
## Compiler changes
|
||||
|
||||
- Added `--declaredLocs` to show symbol declaration location in messages.
|
||||
|
||||
@@ -882,15 +882,66 @@ proc endOperator(L: var Lexer, tok: var Token, pos: int,
|
||||
else: tok.tokType = TokType(tok.ident.id - oprLow + ord(tkColon))
|
||||
L.bufpos = pos
|
||||
|
||||
const
|
||||
UnicodeOperatorStartChars = {'\226', '\194', '\195'}
|
||||
# the allowed unicode characters ("∙ ∘ × ★ ⊗ ⊘ ⊙ ⊛ ⊠ ⊡ ∩ ∧ ⊓ ± ⊕ ⊖ ⊞ ⊟ ∪ ∨ ⊔")
|
||||
# all start with one of these.
|
||||
|
||||
type
|
||||
UnicodeOprPred = enum
|
||||
Mul, Add
|
||||
|
||||
proc unicodeOprLen(buf: cstring; pos: int): (int8, UnicodeOprPred) =
|
||||
template m(len): untyped = (int8(len), Mul)
|
||||
template a(len): untyped = (int8(len), Add)
|
||||
result = 0.m
|
||||
case buf[pos]
|
||||
of '\226':
|
||||
if buf[pos+1] == '\136':
|
||||
if buf[pos+2] == '\152': result = 3.m # ∘
|
||||
elif buf[pos+2] == '\153': result = 3.m # ∙
|
||||
elif buf[pos+2] == '\167': result = 3.m # ∧
|
||||
elif buf[pos+2] == '\168': result = 3.a # ∨
|
||||
elif buf[pos+2] == '\169': result = 3.m # ∩
|
||||
elif buf[pos+2] == '\170': result = 3.a # ∪
|
||||
elif buf[pos+1] == '\138':
|
||||
if buf[pos+2] == '\147': result = 3.m # ⊓
|
||||
elif buf[pos+2] == '\148': result = 3.a # ⊔
|
||||
elif buf[pos+2] == '\149': result = 3.a # ⊕
|
||||
elif buf[pos+2] == '\150': result = 3.a # ⊖
|
||||
elif buf[pos+2] == '\151': result = 3.m # ⊗
|
||||
elif buf[pos+2] == '\152': result = 3.m # ⊘
|
||||
elif buf[pos+2] == '\153': result = 3.m # ⊙
|
||||
elif buf[pos+2] == '\155': result = 3.m # ⊛
|
||||
elif buf[pos+2] == '\158': result = 3.a # ⊞
|
||||
elif buf[pos+2] == '\159': result = 3.a # ⊟
|
||||
elif buf[pos+2] == '\160': result = 3.m # ⊠
|
||||
elif buf[pos+2] == '\161': result = 3.m # ⊡
|
||||
elif buf[pos+1] == '\152' and buf[pos+2] == '\133': result = 3.m # ★
|
||||
of '\194':
|
||||
if buf[pos+1] == '\177': result = 2.a # ±
|
||||
of '\195':
|
||||
if buf[pos+1] == '\151': result = 2.m # ×
|
||||
else:
|
||||
discard
|
||||
|
||||
proc getOperator(L: var Lexer, tok: var Token) =
|
||||
var pos = L.bufpos
|
||||
tokenBegin(tok, pos)
|
||||
var h: Hash = 0
|
||||
while true:
|
||||
var c = L.buf[pos]
|
||||
if c notin OpChars: break
|
||||
h = h !& ord(c)
|
||||
inc(pos)
|
||||
if c in OpChars:
|
||||
h = h !& ord(c)
|
||||
inc(pos)
|
||||
elif c in UnicodeOperatorStartChars and unicodeOperators in L.config.features:
|
||||
let oprLen = unicodeOprLen(L.buf, pos)[0]
|
||||
if oprLen == 0: break
|
||||
for i in 0..<oprLen:
|
||||
h = h !& ord(L.buf[pos])
|
||||
inc pos
|
||||
else:
|
||||
break
|
||||
endOperator(L, tok, pos, h)
|
||||
tokenEnd(tok, pos-1)
|
||||
# advance pos but don't store it in L.bufpos so the next token (which might
|
||||
@@ -904,6 +955,9 @@ proc getOperator(L: var Lexer, tok: var Token) =
|
||||
|
||||
proc getPrecedence*(tok: Token): int =
|
||||
## Calculates the precedence of the given token.
|
||||
const
|
||||
MulPred = 9
|
||||
PlusPred = 8
|
||||
case tok.tokType
|
||||
of tkOpr:
|
||||
let relevantChar = tok.ident.s[0]
|
||||
@@ -917,13 +971,22 @@ proc getPrecedence*(tok: Token): int =
|
||||
|
||||
case relevantChar
|
||||
of '$', '^': considerAsgn(10)
|
||||
of '*', '%', '/', '\\': considerAsgn(9)
|
||||
of '*', '%', '/', '\\': considerAsgn(MulPred)
|
||||
of '~': result = 8
|
||||
of '+', '-', '|': considerAsgn(8)
|
||||
of '+', '-', '|': considerAsgn(PlusPred)
|
||||
of '&': considerAsgn(7)
|
||||
of '=', '<', '>', '!': result = 5
|
||||
of '.': considerAsgn(6)
|
||||
of '?': result = 2
|
||||
of UnicodeOperatorStartChars:
|
||||
if tok.ident.s[^1] == '=':
|
||||
result = 1
|
||||
else:
|
||||
let (len, pred) = unicodeOprLen(cstring(tok.ident.s), 0)
|
||||
if len != 0:
|
||||
result = if pred == Mul: MulPred else: PlusPred
|
||||
else:
|
||||
result = 2
|
||||
else: considerAsgn(2)
|
||||
of tkDiv, tkMod, tkShl, tkShr: result = 9
|
||||
of tkDotDot: result = 6
|
||||
@@ -1167,10 +1230,15 @@ proc rawGetTok*(L: var Lexer, tok: var Token) =
|
||||
var c = L.buf[L.bufpos]
|
||||
tok.line = L.lineNumber
|
||||
tok.col = getColNumber(L, L.bufpos)
|
||||
if c in SymStartChars - {'r', 'R'}:
|
||||
if c in SymStartChars - {'r', 'R'} - UnicodeOperatorStartChars:
|
||||
getSymbol(L, tok)
|
||||
else:
|
||||
case c
|
||||
of UnicodeOperatorStartChars:
|
||||
if unicodeOperators in L.config.features and unicodeOprLen(L.buf, L.bufpos)[0] != 0:
|
||||
getOperator(L, tok)
|
||||
else:
|
||||
getSymbol(L, tok)
|
||||
of '#':
|
||||
scanComment(L, tok)
|
||||
of '*':
|
||||
|
||||
@@ -205,7 +205,8 @@ type
|
||||
views,
|
||||
strictNotNil,
|
||||
overloadableEnums,
|
||||
strictEffects
|
||||
strictEffects,
|
||||
unicodeOperators
|
||||
|
||||
LegacyFeature* = enum
|
||||
allowSemcheckedAstModification,
|
||||
|
||||
@@ -700,6 +700,25 @@ contain a dot: `{..}` are the three tokens `{`:tok:, `..`:tok:, `}`:tok:
|
||||
and not the two tokens `{.`:tok:, `.}`:tok:.
|
||||
|
||||
|
||||
Unicode Operators
|
||||
-----------------
|
||||
|
||||
Under the `--experimental:unicodeOperators` switch these Unicode operators are
|
||||
also parsed as operators::
|
||||
|
||||
∙ ∘ × ★ ⊗ ⊘ ⊙ ⊛ ⊠ ⊡ ∩ ∧ ⊓ # same priority as * (multiplication)
|
||||
± ⊕ ⊖ ⊞ ⊟ ∪ ∨ ⊔ # same priority as + (addition)
|
||||
|
||||
|
||||
If enabled, Unicode operators can be combined with non-Unicode operator
|
||||
symbols. The usual precedence extensions then apply, for example, `⊠=` is an
|
||||
assignment like operator just like `*=` is.
|
||||
|
||||
No Unicode normalization step is performed.
|
||||
|
||||
**Note**: Due to parser limitations one **cannot** enable this feature via a
|
||||
pragma `{.experimental: "unicodeOperators".}` reliably.
|
||||
|
||||
|
||||
Syntax
|
||||
======
|
||||
|
||||
1
tests/lexer/nim.cfg
Normal file
1
tests/lexer/nim.cfg
Normal file
@@ -0,0 +1 @@
|
||||
--experimental:unicodeOperators
|
||||
14
tests/lexer/tunicode_operators.nim
Normal file
14
tests/lexer/tunicode_operators.nim
Normal file
@@ -0,0 +1,14 @@
|
||||
#{.experimental: "unicodeOperators".}
|
||||
|
||||
proc `⊙`(x, y: int): int = x * y
|
||||
proc `⊙=`(x: var int, y: int) = x *= y
|
||||
|
||||
proc `⊞++`(x, y: int): int = x + y
|
||||
|
||||
var x = 45
|
||||
x ⊙= 9 ⊞++ 4 ⊙ 3
|
||||
|
||||
var y = 45
|
||||
y *= 9 + 4 * 3
|
||||
|
||||
assert x == y
|
||||
Reference in New Issue
Block a user