changed handling of generalized string literals

This commit is contained in:
Araq
2011-02-07 00:11:11 +01:00
parent 77d045b3ac
commit 134f24f579
10 changed files with 76 additions and 39 deletions

View File

@@ -29,15 +29,18 @@ symbol ::= '`' (KEYWORD | IDENT | operator | '(' ')'
| IDENT
primaryPrefix ::= (prefixOperator | 'bind') optInd
primarySuffix ::= '.' optInd symbol
primarySuffix ::= '.' optInd symbol [generalizedLit]
| '(' optInd namedExprList optPar ')'
| '[' optInd [indexExpr (comma indexExpr)* [comma]] optPar ']'
| '^'
| pragma
primary ::= primaryPrefix* (symbol | constructor | castExpr | addrExpr)
primary ::= primaryPrefix* (symbol [generalizedLit] |
constructor | castExpr | addrExpr)
primarySuffix*
generalizedLit ::= GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
literal ::= INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
| FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
| STR_LIT | RSTR_LIT | TRIPLESTR_LIT

View File

@@ -178,6 +178,8 @@ the exact spelling of an identifier.
String literals
---------------
Terminal symbol in the grammar: ``STR_LIT``.
`String literals`:idx: can be delimited by matching double quotes, and can
contain the following `escape sequences`:idx:\ :
@@ -205,12 +207,14 @@ contain the following `escape sequences`:idx:\ :
Strings in Nimrod may contain any 8-bit value, even embedded zeros. However
some operations may interpret the first binary zero as terminator.
some operations may interpret the first binary zero as a terminator.
Triple quoted string literals
-----------------------------
Terminal symbol in the grammar: ``TRIPLESTR_LIT``.
String literals can also be delimited by three double quotes
``"""`` ... ``"""``.
Literals in this form may run for several lines, may contain ``"`` and do not
@@ -230,10 +234,12 @@ Produces::
Raw string literals
-------------------
There are also `raw string literals` that are preceded with the letter ``r``
(or ``R``) and are delimited by matching double quotes (just like ordinary
string literals) and do not interpret the escape sequences. This is especially
convenient for regular expressions or Windows paths:
Terminal symbol in the grammar: ``RSTR_LIT``.
There are also `raw string literals`:idx: that are preceded with the
letter ``r`` (or ``R``) and are delimited by matching double quotes (just
like ordinary string literals) and do not interpret the escape sequences.
This is especially convenient for regular expressions or Windows paths:
.. code-block:: nimrod
@@ -250,12 +256,17 @@ Produces::
a"b
``r""""`` is not possible with this notation, because the three leading
quotes introduce a triple quoted string literal.
quotes introduce a triple quoted string literal. ``r"""`` is the same
as ``"""`` since triple quoted string literals do not interpret escape
sequences either.
Generalized raw string literals
-------------------------------
Terminal symbols in the grammar: ``GENERALIZED_STR_LIT``,
``GENERALIZED_TRIPLESTR_LIT``.
The construct ``identifier"string literal"`` (without whitespace between the
identifier and the opening quotation mark) is a
`generalized raw string literal`:idx:. It is a shortcut for the construct

View File

@@ -128,7 +128,7 @@ proc addImplicitMod(filename: string) =
gImplicitMods[length] = filename
proc getPrefixDir(): string =
result = SplitPath(getApplicationDir()).head
result = SplitPath(getAppDir()).head
proc shortenDir(dir: string): string =
# returns the interesting part of a dir

View File

@@ -163,12 +163,28 @@ proc parseAddr(p: var TParser): PNode =
addSon(result, parseExpr(p))
optPar(p)
eat(p, tkParRi)
proc parseGStrLit(p: var TParser, a: PNode): PNode =
case p.tok.tokType
of tkGStrLit:
result = newNodeP(nkCallStrLit, p)
addSon(result, a)
addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
getTok(p)
of tkGTripleStrLit:
result = newNodeP(nkCallStrLit, p)
addSon(result, a)
addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
getTok(p)
else:
result = a
proc identOrLiteral(p: var TParser): PNode =
case p.tok.tokType
of tkSymbol:
result = newIdentNodeP(p.tok.ident, p)
getTok(p)
result = parseGStrLit(p, result)
of tkAccent:
result = accExpr(p) # literals
of tkIntLit:
@@ -212,16 +228,6 @@ proc identOrLiteral(p: var TParser): PNode =
of tkTripleStrLit:
result = newStrNodeP(nkTripleStrLit, p.tok.literal, p)
getTok(p)
of tkCallRStrLit:
result = newNodeP(nkCallStrLit, p)
addSon(result, newIdentNodeP(p.tok.ident, p))
addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
getTok(p)
of tkCallTripleStrLit:
result = newNodeP(nkCallStrLit, p)
addSon(result, newIdentNodeP(p.tok.ident, p))
addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
getTok(p)
of tkCharLit:
result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
getTok(p)
@@ -279,6 +285,7 @@ proc primary(p: var TParser): PNode =
getTok(p) # skip '.'
optInd(p, result)
addSon(result, parseSymbol(p))
result = parseGStrLit(p, result)
of tkHat:
a = result
result = newNodeP(nkDerefExpr, p)

View File

@@ -394,11 +394,27 @@ proc setBaseFlags(n: PNode, base: TNumericalBase) =
of base8: incl(n.flags, nfBase8)
of base16: incl(n.flags, nfBase16)
proc parseGStrLit(p: var TParser, a: PNode): PNode =
case p.tok.tokType
of tkGStrLit:
result = newNodeP(nkCallStrLit, p)
addSon(result, a)
addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
getTok(p)
of tkGTripleStrLit:
result = newNodeP(nkCallStrLit, p)
addSon(result, a)
addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
getTok(p)
else:
result = a
proc identOrLiteral(p: var TParser): PNode =
case p.tok.tokType
of tkSymbol:
result = newIdentNodeP(p.tok.ident, p)
getTok(p)
result = parseGStrLit(p, result)
of tkAccent:
result = accExpr(p) # literals
of tkIntLit:
@@ -442,16 +458,6 @@ proc identOrLiteral(p: var TParser): PNode =
of tkTripleStrLit:
result = newStrNodeP(nkTripleStrLit, p.tok.literal, p)
getTok(p)
of tkCallRStrLit:
result = newNodeP(nkCallStrLit, p)
addSon(result, newIdentNodeP(p.tok.ident, p))
addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p))
getTok(p)
of tkCallTripleStrLit:
result = newNodeP(nkCallStrLit, p)
addSon(result, newIdentNodeP(p.tok.ident, p))
addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p))
getTok(p)
of tkCharLit:
result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
getTok(p)
@@ -509,6 +515,7 @@ proc primary(p: var TParser): PNode =
getTok(p) # skip '.'
optInd(p, result)
addSon(result, parseSymbol(p))
result = parseGStrLit(p, result)
of tkHat:
a = result
result = newNodeP(nkDerefExpr, p)

View File

@@ -684,7 +684,7 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
if n.sons[1].kind == nkRStrLit:
put(g, tkRStrLit, '\"' & replace(n[1].strVal, "\"", "\"\"") & '\"')
else:
gsub(g, n.sons[0])
gsub(g, n.sons[1])
of nkHiddenStdConv, nkHiddenSubConv, nkHiddenCallConv: gsub(g, n.sons[0])
of nkCast:
put(g, tkCast, "cast")

View File

@@ -58,7 +58,7 @@ type
tkYield, #[[[end]]]
tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit, tkFloatLit,
tkFloat32Lit, tkFloat64Lit, tkStrLit, tkRStrLit, tkTripleStrLit,
tkCallRStrLit, tkCallTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe,
tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe,
tkBracketRi, tkCurlyLe, tkCurlyRi,
tkBracketDotLe, tkBracketDotRi, # [. and .]
tkCurlyDotLe, tkCurlyDotRi, # {. and .}
@@ -91,7 +91,7 @@ const
"yield", #[[[end]]]
"tkIntLit", "tkInt8Lit", "tkInt16Lit", "tkInt32Lit", "tkInt64Lit",
"tkFloatLit", "tkFloat32Lit", "tkFloat64Lit", "tkStrLit", "tkRStrLit",
"tkTripleStrLit", "tkCallRStrLit", "tkCallTripleStrLit", "tkCharLit", "(",
"tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(",
")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)", ",", ";", ":",
"=", ".", "..", "^", "tkOpr", "tkComment", "`", "[new indentation]",
"[same indentation]", "[dedentation]", "tkSpaces", "tkInfixOpr",
@@ -587,10 +587,11 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
tok.tokType = tkSymbol
else:
tok.tokType = TTokType(tok.ident.id + ord(tkSymbol))
if buf[pos] == '\"':
getString(L, tok, true)
if tok.tokType == tkRStrLit: tok.tokType = tkCallRStrLit
else: tok.tokType = tkCallTripleStrLit
when false:
if buf[pos] == '\"':
getString(L, tok, true)
if tok.tokType == tkRStrLit: tok.tokType = tkCallRStrLit
else: tok.tokType = tkCallTripleStrLit
proc getOperator(L: var TLexer, tok: var TToken) =
var pos = L.bufpos
@@ -770,7 +771,13 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
tok.tokType = tkAccent
Inc(L.bufpos)
of '\"':
getString(L, tok, false)
# check for extended raw string literal:
var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars
getString(L, tok, rawMode)
if rawMode:
# tkRStrLit -> tkGStrLit
# tkTripleStrLit -> tkGTripleStrLit
inc(tok.tokType, 2)
of '\'':
tok.tokType = tkCharLit
getCharacter(L, tok)

View File

@@ -6,7 +6,8 @@ import
if "keyA = valueA" =~ re"\s*(\w+)\s*\=\s*(\w+)":
write(stdout, "key: ", matches[0])
elif "# comment!" =~ re"\s*(\#.*)":
elif "# comment!" =~ re.re"\s*(\#.*)":
# test re.re"" syntax
echo("comment: ", matches[0])
else:
echo("Bug!")

View File

@@ -1,7 +1,6 @@
- thread support: threadvar on Windows seems broken;
add --deadlock_prevention:on|off switch
- built-in serialization
- change how generalized raw string literals work
- we need a way to disable tests
- deprecate ^ and make it available as operator

View File

@@ -32,6 +32,8 @@ Changes affecting backwards compatibility
instead.
- Deprecated ``os.getApplicationDir``: Use ``os.getAppDir``
instead.
- Changed and documented how generalized string literals work: The syntax
``module.re"abc"`` is now supported.
Additions