remove en-dash from the language

2026-02-13 14:53:46 +00:00 · 2017-04-02 15:21:10 +02:00
parent a543b89bf8
commit b652b3cd52
6 changed files with 11 additions and 51 deletions
--- a/compiler/idents.nim
+++ b/compiler/idents.nim
@@ -12,7 +12,7 @@
 # id. This module is essential for the compiler's performance.

 import
-  hashes, strutils, etcpriv, wordrecg
+  hashes, strutils, wordrecg

 type
  TIdObj* = object of RootObj
@@ -45,8 +45,6 @@ proc cmpIgnoreStyle(a, b: cstring, blen: int): int =
  while j < blen:
    while a[i] == '_': inc(i)
    while b[j] == '_': inc(j)
-    while isMagicIdentSeparatorRune(a, i): inc(i, magicIdentSeparatorRuneByteWidth)
-    while isMagicIdentSeparatorRune(b, j): inc(j, magicIdentSeparatorRuneByteWidth)
    # tolower inlined:
    var aa = a[i]
    var bb = b[j]
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -17,7 +17,7 @@

 import
  hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
-  wordrecg, etcpriv
+  wordrecg

 const
  MaxLineLength* = 80         # lines longer than this lead to a warning
@@ -158,8 +158,6 @@ proc isNimIdentifier*(s: string): bool =
    while i < sLen:
      if s[i] == '_':
        inc(i)
-      elif isMagicIdentSeparatorRune(cstring s, i):
-        inc(i, magicIdentSeparatorRuneByteWidth)
      if s[i] notin SymChars: return
      inc(i)
    result = true
@@ -782,27 +780,17 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
    var c = buf[pos]
    case c
    of 'a'..'z', '0'..'9', '\x80'..'\xFF':
-      if  c == '\226' and
-          buf[pos+1] == '\128' and
-          buf[pos+2] == '\147':  # It's a 'magic separator' en-dash Unicode
-        if buf[pos + magicIdentSeparatorRuneByteWidth] notin SymChars or
-            isMagicIdentSeparatorRune(buf, pos+magicIdentSeparatorRuneByteWidth) or pos == L.bufpos:
-          lexMessage(L, errInvalidToken, "–")
-          break
-        inc(pos, magicIdentSeparatorRuneByteWidth)
-      else:
-        h = h !& ord(c)
-        inc(pos)
+      h = h !& ord(c)
+      inc(pos)
    of 'A'..'Z':
      c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
      h = h !& ord(c)
      inc(pos)
    of '_':
-      if buf[pos+1] notin SymChars or isMagicIdentSeparatorRune(buf, pos+1):
+      if buf[pos+1] notin SymChars:
        lexMessage(L, errInvalidToken, "_")
        break
      inc(pos)
-
    else: break
  tokenEnd(pos-1)
  h = !$h
@@ -1117,8 +1105,7 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
      inc(L.bufpos)
    of '_':
      inc(L.bufpos)
-      if L.buf[L.bufpos] notin SymChars+{'_'} and not
-          isMagicIdentSeparatorRune(L.buf, L.bufpos):
+      if L.buf[L.bufpos] notin SymChars+{'_'}:
        tok.tokType = tkSymbol
        tok.ident = L.cache.getIdent("_")
      else:
--- a/doc/manual/lexing.txt
+++ b/doc/manual/lexing.txt
@@ -133,8 +133,7 @@ Two identifiers are considered equal if the following algorithm returns true:
      a.replace(re"_|–", "").toLower == b.replace(re"_|–", "").toLower

 That means only the first letters are compared in a case sensitive manner. Other
-letters are compared case insensitively and underscores and en-dash (Unicode
-point U+2013) are ignored.
+letters are compared case insensitively and underscores are ignored.

 This rather unorthodox way to do identifier comparisons is called
 `partial case insensitivity`:idx: and has some advantages over the conventional
--- a/lib/pure/etcpriv.nim
+++ b/lib/pure/etcpriv.nim
@@ -1,23 +0,0 @@
-#
-#
-#            Nim's Runtime Library
-#        (c) Copyright 2015 Nim Authors
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-## This module contains utils that are less then easy to categorize and
-## don't really warrant a specific module. They are private to compiler
-## and stdlib usage, and should not be used outside of that - they may
-## change or disappear at any time.
-
-
-# Used by pure/hashes.nim, and the compiler parsing
-const magicIdentSeparatorRuneByteWidth* = 3
-
-# Used by pure/hashes.nim, and the compiler parsing
-proc isMagicIdentSeparatorRune*(cs: cstring, i: int): bool  {. inline } =
-  result =  cs[i] == '\226' and
-            cs[i + 1] == '\128' and
-            cs[i + 2] == '\147'     # en-dash  # 145 = nb-hyphen
--- a/lib/pure/hashes.nim
+++ b/lib/pure/hashes.nim
@@ -39,7 +39,7 @@
 ##    result = !$h

 import
-  strutils, etcpriv
+  strutils

 type
  Hash* = int ## a hash value; hash tables using these values should
@@ -163,8 +163,6 @@ proc hashIgnoreStyle*(x: string): Hash =
    var c = x[i]
    if c == '_':
      inc(i)
-    elif isMagicIdentSeparatorRune(cstring(x), i):
-      inc(i, magicIdentSeparatorRuneByteWidth)
    else:
      if c in {'A'..'Z'}:
        c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
@@ -185,8 +183,6 @@ proc hashIgnoreStyle*(sBuf: string, sPos, ePos: int): Hash =
    var c = sBuf[i]
    if c == '_':
      inc(i)
-    elif isMagicIdentSeparatorRune(cstring(sBuf), i):
-      inc(i, magicIdentSeparatorRuneByteWidth)
    else:
      if c in {'A'..'Z'}:
        c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
--- a/web/news/e031_version_0_16_2.rst
+++ b/web/news/e031_version_0_16_2.rst
@@ -45,6 +45,9 @@ Changes affecting backwards compatibility
 - If the dispatcher parameter's value used in multi method is ``nil``,
  a ``NilError`` exception is raised. The old behavior was that the method
  would be a ``nop`` then.
+- In Nim identifiers en-dash (Unicode point U+2013) is not an alias for the
+  underscore anymore. Use underscores and fix your programming font instead.
+


 Library Additions