mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-29 17:34:43 +00:00
changed handling of generalized string literals
This commit is contained in:
@@ -29,15 +29,18 @@ symbol ::= '`' (KEYWORD | IDENT | operator | '(' ')'
|
||||
| IDENT
|
||||
|
||||
primaryPrefix ::= (prefixOperator | 'bind') optInd
|
||||
primarySuffix ::= '.' optInd symbol
|
||||
primarySuffix ::= '.' optInd symbol [generalizedLit]
|
||||
| '(' optInd namedExprList optPar ')'
|
||||
| '[' optInd [indexExpr (comma indexExpr)* [comma]] optPar ']'
|
||||
| '^'
|
||||
| pragma
|
||||
|
||||
primary ::= primaryPrefix* (symbol | constructor | castExpr | addrExpr)
|
||||
primary ::= primaryPrefix* (symbol [generalizedLit] |
|
||||
constructor | castExpr | addrExpr)
|
||||
primarySuffix*
|
||||
|
||||
generalizedLit ::= GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
|
||||
|
||||
literal ::= INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
|
||||
| FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
|
||||
| STR_LIT | RSTR_LIT | TRIPLESTR_LIT
|
||||
|
||||
@@ -178,6 +178,8 @@ the exact spelling of an identifier.
|
||||
String literals
|
||||
---------------
|
||||
|
||||
Terminal symbol in the grammar: ``STR_LIT``.
|
||||
|
||||
`String literals`:idx: can be delimited by matching double quotes, and can
|
||||
contain the following `escape sequences`:idx:\ :
|
||||
|
||||
@@ -205,12 +207,14 @@ contain the following `escape sequences`:idx:\ :
|
||||
|
||||
|
||||
Strings in Nimrod may contain any 8-bit value, even embedded zeros. However
|
||||
some operations may interpret the first binary zero as terminator.
|
||||
some operations may interpret the first binary zero as a terminator.
|
||||
|
||||
|
||||
Triple quoted string literals
|
||||
-----------------------------
|
||||
|
||||
Terminal symbol in the grammar: ``TRIPLESTR_LIT``.
|
||||
|
||||
String literals can also be delimited by three double quotes
|
||||
``"""`` ... ``"""``.
|
||||
Literals in this form may run for several lines, may contain ``"`` and do not
|
||||
@@ -230,10 +234,12 @@ Produces::
|
||||
Raw string literals
|
||||
-------------------
|
||||
|
||||
There are also `raw string literals` that are preceded with the letter ``r``
|
||||
(or ``R``) and are delimited by matching double quotes (just like ordinary
|
||||
string literals) and do not interpret the escape sequences. This is especially
|
||||
convenient for regular expressions or Windows paths:
|
||||
Terminal symbol in the grammar: ``RSTR_LIT``.
|
||||
|
||||
There are also `raw string literals`:idx: that are preceded with the
|
||||
letter ``r`` (or ``R``) and are delimited by matching double quotes (just
|
||||
like ordinary string literals) and do not interpret the escape sequences.
|
||||
This is especially convenient for regular expressions or Windows paths:
|
||||
|
||||
.. code-block:: nimrod
|
||||
|
||||
@@ -250,12 +256,17 @@ Produces::
|
||||
a"b
|
||||
|
||||
``r""""`` is not possible with this notation, because the three leading
|
||||
quotes introduce a triple quoted string literal.
|
||||
quotes introduce a triple quoted string literal. ``r"""`` is the same
|
||||
as ``"""`` since triple quoted string literals do not interpret escape
|
||||
sequences either.
|
||||
|
||||
|
||||
Generalized raw string literals
|
||||
-------------------------------
|
||||
|
||||
Terminal symbols in the grammar: ``GENERALIZED_STR_LIT``,
|
||||
``GENERALIZED_TRIPLESTR_LIT``.
|
||||
|
||||
The construct ``identifier"string literal"`` (without whitespace between the
|
||||
identifier and the opening quotation mark) is a
|
||||
`generalized raw string literal`:idx:. It is a shortcut for the construct
|
||||
|
||||
@@ -128,7 +128,7 @@ proc addImplicitMod(filename: string) =
|
||||
gImplicitMods[length] = filename
|
||||
|
||||
proc getPrefixDir(): string =
|
||||
result = SplitPath(getApplicationDir()).head
|
||||
result = SplitPath(getAppDir()).head
|
||||
|
||||
proc shortenDir(dir: string): string =
|
||||
# returns the interesting part of a dir
|
||||
|
||||
@@ -163,12 +163,28 @@ proc parseAddr(p: var TParser): PNode =
|
||||
addSon(result, parseExpr(p))
|
||||
optPar(p)
|
||||
eat(p, tkParRi)
|
||||
|
||||
proc parseGStrLit(p: var TParser, a: PNode): PNode =
|
||||
case p.tok.tokType
|
||||
of tkGStrLit:
|
||||
result = newNodeP(nkCallStrLit, p)
|
||||
addSon(result, a)
|
||||
addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
|
||||
getTok(p)
|
||||
of tkGTripleStrLit:
|
||||
result = newNodeP(nkCallStrLit, p)
|
||||
addSon(result, a)
|
||||
addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
|
||||
getTok(p)
|
||||
else:
|
||||
result = a
|
||||
|
||||
proc identOrLiteral(p: var TParser): PNode =
|
||||
case p.tok.tokType
|
||||
of tkSymbol:
|
||||
result = newIdentNodeP(p.tok.ident, p)
|
||||
getTok(p)
|
||||
result = parseGStrLit(p, result)
|
||||
of tkAccent:
|
||||
result = accExpr(p) # literals
|
||||
of tkIntLit:
|
||||
@@ -212,16 +228,6 @@ proc identOrLiteral(p: var TParser): PNode =
|
||||
of tkTripleStrLit:
|
||||
result = newStrNodeP(nkTripleStrLit, p.tok.literal, p)
|
||||
getTok(p)
|
||||
of tkCallRStrLit:
|
||||
result = newNodeP(nkCallStrLit, p)
|
||||
addSon(result, newIdentNodeP(p.tok.ident, p))
|
||||
addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
|
||||
getTok(p)
|
||||
of tkCallTripleStrLit:
|
||||
result = newNodeP(nkCallStrLit, p)
|
||||
addSon(result, newIdentNodeP(p.tok.ident, p))
|
||||
addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
|
||||
getTok(p)
|
||||
of tkCharLit:
|
||||
result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
|
||||
getTok(p)
|
||||
@@ -279,6 +285,7 @@ proc primary(p: var TParser): PNode =
|
||||
getTok(p) # skip '.'
|
||||
optInd(p, result)
|
||||
addSon(result, parseSymbol(p))
|
||||
result = parseGStrLit(p, result)
|
||||
of tkHat:
|
||||
a = result
|
||||
result = newNodeP(nkDerefExpr, p)
|
||||
|
||||
@@ -394,11 +394,27 @@ proc setBaseFlags(n: PNode, base: TNumericalBase) =
|
||||
of base8: incl(n.flags, nfBase8)
|
||||
of base16: incl(n.flags, nfBase16)
|
||||
|
||||
proc parseGStrLit(p: var TParser, a: PNode): PNode =
|
||||
case p.tok.tokType
|
||||
of tkGStrLit:
|
||||
result = newNodeP(nkCallStrLit, p)
|
||||
addSon(result, a)
|
||||
addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
|
||||
getTok(p)
|
||||
of tkGTripleStrLit:
|
||||
result = newNodeP(nkCallStrLit, p)
|
||||
addSon(result, a)
|
||||
addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
|
||||
getTok(p)
|
||||
else:
|
||||
result = a
|
||||
|
||||
proc identOrLiteral(p: var TParser): PNode =
|
||||
case p.tok.tokType
|
||||
of tkSymbol:
|
||||
result = newIdentNodeP(p.tok.ident, p)
|
||||
getTok(p)
|
||||
result = parseGStrLit(p, result)
|
||||
of tkAccent:
|
||||
result = accExpr(p) # literals
|
||||
of tkIntLit:
|
||||
@@ -442,16 +458,6 @@ proc identOrLiteral(p: var TParser): PNode =
|
||||
of tkTripleStrLit:
|
||||
result = newStrNodeP(nkTripleStrLit, p.tok.literal, p)
|
||||
getTok(p)
|
||||
of tkCallRStrLit:
|
||||
result = newNodeP(nkCallStrLit, p)
|
||||
addSon(result, newIdentNodeP(p.tok.ident, p))
|
||||
addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
|
||||
getTok(p)
|
||||
of tkCallTripleStrLit:
|
||||
result = newNodeP(nkCallStrLit, p)
|
||||
addSon(result, newIdentNodeP(p.tok.ident, p))
|
||||
addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
|
||||
getTok(p)
|
||||
of tkCharLit:
|
||||
result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
|
||||
getTok(p)
|
||||
@@ -509,6 +515,7 @@ proc primary(p: var TParser): PNode =
|
||||
getTok(p) # skip '.'
|
||||
optInd(p, result)
|
||||
addSon(result, parseSymbol(p))
|
||||
result = parseGStrLit(p, result)
|
||||
of tkHat:
|
||||
a = result
|
||||
result = newNodeP(nkDerefExpr, p)
|
||||
|
||||
@@ -684,7 +684,7 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
|
||||
if n.sons[1].kind == nkRStrLit:
|
||||
put(g, tkRStrLit, '\"' & replace(n[1].strVal, "\"", "\"\"") & '\"')
|
||||
else:
|
||||
gsub(g, n.sons[0])
|
||||
gsub(g, n.sons[1])
|
||||
of nkHiddenStdConv, nkHiddenSubConv, nkHiddenCallConv: gsub(g, n.sons[0])
|
||||
of nkCast:
|
||||
put(g, tkCast, "cast")
|
||||
|
||||
@@ -58,7 +58,7 @@ type
|
||||
tkYield, #[[[end]]]
|
||||
tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit, tkFloatLit,
|
||||
tkFloat32Lit, tkFloat64Lit, tkStrLit, tkRStrLit, tkTripleStrLit,
|
||||
tkCallRStrLit, tkCallTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe,
|
||||
tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe,
|
||||
tkBracketRi, tkCurlyLe, tkCurlyRi,
|
||||
tkBracketDotLe, tkBracketDotRi, # [. and .]
|
||||
tkCurlyDotLe, tkCurlyDotRi, # {. and .}
|
||||
@@ -91,7 +91,7 @@ const
|
||||
"yield", #[[[end]]]
|
||||
"tkIntLit", "tkInt8Lit", "tkInt16Lit", "tkInt32Lit", "tkInt64Lit",
|
||||
"tkFloatLit", "tkFloat32Lit", "tkFloat64Lit", "tkStrLit", "tkRStrLit",
|
||||
"tkTripleStrLit", "tkCallRStrLit", "tkCallTripleStrLit", "tkCharLit", "(",
|
||||
"tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(",
|
||||
")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)", ",", ";", ":",
|
||||
"=", ".", "..", "^", "tkOpr", "tkComment", "`", "[new indentation]",
|
||||
"[same indentation]", "[dedentation]", "tkSpaces", "tkInfixOpr",
|
||||
@@ -587,10 +587,11 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
|
||||
tok.tokType = tkSymbol
|
||||
else:
|
||||
tok.tokType = TTokType(tok.ident.id + ord(tkSymbol))
|
||||
if buf[pos] == '\"':
|
||||
getString(L, tok, true)
|
||||
if tok.tokType == tkRStrLit: tok.tokType = tkCallRStrLit
|
||||
else: tok.tokType = tkCallTripleStrLit
|
||||
when false:
|
||||
if buf[pos] == '\"':
|
||||
getString(L, tok, true)
|
||||
if tok.tokType == tkRStrLit: tok.tokType = tkCallRStrLit
|
||||
else: tok.tokType = tkCallTripleStrLit
|
||||
|
||||
proc getOperator(L: var TLexer, tok: var TToken) =
|
||||
var pos = L.bufpos
|
||||
@@ -770,7 +771,13 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
|
||||
tok.tokType = tkAccent
|
||||
Inc(L.bufpos)
|
||||
of '\"':
|
||||
getString(L, tok, false)
|
||||
# check for extended raw string literal:
|
||||
var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars
|
||||
getString(L, tok, rawMode)
|
||||
if rawMode:
|
||||
# tkRStrLit -> tkGStrLit
|
||||
# tkTripleStrLit -> tkGTripleStrLit
|
||||
inc(tok.tokType, 2)
|
||||
of '\'':
|
||||
tok.tokType = tkCharLit
|
||||
getCharacter(L, tok)
|
||||
|
||||
@@ -6,7 +6,8 @@ import
|
||||
|
||||
if "keyA = valueA" =~ re"\s*(\w+)\s*\=\s*(\w+)":
|
||||
write(stdout, "key: ", matches[0])
|
||||
elif "# comment!" =~ re"\s*(\#.*)":
|
||||
elif "# comment!" =~ re.re"\s*(\#.*)":
|
||||
# test re.re"" syntax
|
||||
echo("comment: ", matches[0])
|
||||
else:
|
||||
echo("Bug!")
|
||||
|
||||
1
todo.txt
1
todo.txt
@@ -1,7 +1,6 @@
|
||||
- thread support: threadvar on Windows seems broken;
|
||||
add --deadlock_prevention:on|off switch
|
||||
- built-in serialization
|
||||
- change how generalized raw string literals work
|
||||
|
||||
- we need a way to disable tests
|
||||
- deprecate ^ and make it available as operator
|
||||
|
||||
@@ -32,6 +32,8 @@ Changes affecting backwards compatibility
|
||||
instead.
|
||||
- Deprecated ``os.getApplicationDir``: Use ``os.getAppDir``
|
||||
instead.
|
||||
- Changed and documented how generalized string literals work: The syntax
|
||||
``module.re"abc"`` is now supported.
|
||||
|
||||
|
||||
Additions
|
||||
|
||||
Reference in New Issue
Block a user