diff --git a/doc/grammar.txt b/doc/grammar.txt index a648913a09..c84caec49c 100755 --- a/doc/grammar.txt +++ b/doc/grammar.txt @@ -29,15 +29,18 @@ symbol ::= '`' (KEYWORD | IDENT | operator | '(' ')' | IDENT primaryPrefix ::= (prefixOperator | 'bind') optInd -primarySuffix ::= '.' optInd symbol +primarySuffix ::= '.' optInd symbol [generalizedLit] | '(' optInd namedExprList optPar ')' | '[' optInd [indexExpr (comma indexExpr)* [comma]] optPar ']' | '^' | pragma -primary ::= primaryPrefix* (symbol | constructor | castExpr | addrExpr) +primary ::= primaryPrefix* (symbol [generalizedLit] | + constructor | castExpr | addrExpr) primarySuffix* +generalizedLit ::= GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT + literal ::= INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT | STR_LIT | RSTR_LIT | TRIPLESTR_LIT diff --git a/doc/manual.txt b/doc/manual.txt index 7c0619ba7b..49311fb43c 100755 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -178,6 +178,8 @@ the exact spelling of an identifier. String literals --------------- +Terminal symbol in the grammar: ``STR_LIT``. + `String literals`:idx: can be delimited by matching double quotes, and can contain the following `escape sequences`:idx:\ : @@ -205,12 +207,14 @@ contain the following `escape sequences`:idx:\ : Strings in Nimrod may contain any 8-bit value, even embedded zeros. However -some operations may interpret the first binary zero as terminator. +some operations may interpret the first binary zero as a terminator. Triple quoted string literals ----------------------------- +Terminal symbol in the grammar: ``TRIPLESTR_LIT``. + String literals can also be delimited by three double quotes ``"""`` ... ``"""``. Literals in this form may run for several lines, may contain ``"`` and do not @@ -230,10 +234,12 @@ Produces:: Raw string literals ------------------- -There are also `raw string literals` that are preceded with the letter ``r`` -(or ``R``) and are delimited by matching double quotes (just like ordinary -string literals) and do not interpret the escape sequences. This is especially -convenient for regular expressions or Windows paths: +Terminal symbol in the grammar: ``RSTR_LIT``. + +There are also `raw string literals`:idx: that are preceded with the +letter ``r`` (or ``R``) and are delimited by matching double quotes (just +like ordinary string literals) and do not interpret the escape sequences. +This is especially convenient for regular expressions or Windows paths: .. code-block:: nimrod @@ -250,12 +256,17 @@ Produces:: a"b ``r""""`` is not possible with this notation, because the three leading -quotes introduce a triple quoted string literal. +quotes introduce a triple quoted string literal. ``r"""`` is the same +as ``"""`` since triple quoted string literals do not interpret escape +sequences either. Generalized raw string literals ------------------------------- +Terminal symbols in the grammar: ``GENERALIZED_STR_LIT``, +``GENERALIZED_TRIPLESTR_LIT``. + The construct ``identifier"string literal"`` (without whitespace between the identifier and the opening quotation mark) is a `generalized raw string literal`:idx:. It is a shortcut for the construct diff --git a/rod/options.nim b/rod/options.nim index 1503770c35..16ff536bd8 100755 --- a/rod/options.nim +++ b/rod/options.nim @@ -128,7 +128,7 @@ proc addImplicitMod(filename: string) = gImplicitMods[length] = filename proc getPrefixDir(): string = - result = SplitPath(getApplicationDir()).head + result = SplitPath(getAppDir()).head proc shortenDir(dir: string): string = # returns the interesting part of a dir diff --git a/rod/pbraces.nim b/rod/pbraces.nim index 4a5f85b85f..dc1ddb2927 100755 --- a/rod/pbraces.nim +++ b/rod/pbraces.nim @@ -163,12 +163,28 @@ proc parseAddr(p: var TParser): PNode = addSon(result, parseExpr(p)) optPar(p) eat(p, tkParRi) + +proc parseGStrLit(p: var TParser, a: PNode): PNode = + case p.tok.tokType + of tkGStrLit: + result = newNodeP(nkCallStrLit, p) + addSon(result, a) + addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p)) + getTok(p) + of tkGTripleStrLit: + result = newNodeP(nkCallStrLit, p) + addSon(result, a) + addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p)) + getTok(p) + else: + result = a proc identOrLiteral(p: var TParser): PNode = case p.tok.tokType of tkSymbol: result = newIdentNodeP(p.tok.ident, p) getTok(p) + result = parseGStrLit(p, result) of tkAccent: result = accExpr(p) # literals of tkIntLit: @@ -212,16 +228,6 @@ proc identOrLiteral(p: var TParser): PNode = of tkTripleStrLit: result = newStrNodeP(nkTripleStrLit, p.tok.literal, p) getTok(p) - of tkCallRStrLit: - result = newNodeP(nkCallStrLit, p) - addSon(result, newIdentNodeP(p.tok.ident, p)) - addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p)) - getTok(p) - of tkCallTripleStrLit: - result = newNodeP(nkCallStrLit, p) - addSon(result, newIdentNodeP(p.tok.ident, p)) - addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p)) - getTok(p) of tkCharLit: result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p) getTok(p) @@ -279,6 +285,7 @@ proc primary(p: var TParser): PNode = getTok(p) # skip '.' optInd(p, result) addSon(result, parseSymbol(p)) + result = parseGStrLit(p, result) of tkHat: a = result result = newNodeP(nkDerefExpr, p) diff --git a/rod/pnimsyn.nim b/rod/pnimsyn.nim index fe80fe0c45..113f47d34e 100755 --- a/rod/pnimsyn.nim +++ b/rod/pnimsyn.nim @@ -394,11 +394,27 @@ proc setBaseFlags(n: PNode, base: TNumericalBase) = of base8: incl(n.flags, nfBase8) of base16: incl(n.flags, nfBase16) +proc parseGStrLit(p: var TParser, a: PNode): PNode = + case p.tok.tokType + of tkGStrLit: + result = newNodeP(nkCallStrLit, p) + addSon(result, a) + addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p)) + getTok(p) + of tkGTripleStrLit: + result = newNodeP(nkCallStrLit, p) + addSon(result, a) + addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p)) + getTok(p) + else: + result = a + proc identOrLiteral(p: var TParser): PNode = case p.tok.tokType of tkSymbol: result = newIdentNodeP(p.tok.ident, p) getTok(p) + result = parseGStrLit(p, result) of tkAccent: result = accExpr(p) # literals of tkIntLit: @@ -442,16 +458,6 @@ proc identOrLiteral(p: var TParser): PNode = of tkTripleStrLit: result = newStrNodeP(nkTripleStrLit, p.tok.literal, p) getTok(p) - of tkCallRStrLit: - result = newNodeP(nkCallStrLit, p) - addSon(result, newIdentNodeP(p.tok.ident, p)) - addSon(result, newStrNodeP(nkRStrLit, p.tok.literal, p)) - getTok(p) - of tkCallTripleStrLit: - result = newNodeP(nkCallStrLit, p) - addSon(result, newIdentNodeP(p.tok.ident, p)) - addSon(result, newStrNodeP(nkTripleStrLit, p.tok.literal, p)) - getTok(p) of tkCharLit: result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p) getTok(p) @@ -509,6 +515,7 @@ proc primary(p: var TParser): PNode = getTok(p) # skip '.' optInd(p, result) addSon(result, parseSymbol(p)) + result = parseGStrLit(p, result) of tkHat: a = result result = newNodeP(nkDerefExpr, p) diff --git a/rod/rnimsyn.nim b/rod/rnimsyn.nim index d0c7dc9e3b..9424c45195 100755 --- a/rod/rnimsyn.nim +++ b/rod/rnimsyn.nim @@ -684,7 +684,7 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) = if n.sons[1].kind == nkRStrLit: put(g, tkRStrLit, '\"' & replace(n[1].strVal, "\"", "\"\"") & '\"') else: - gsub(g, n.sons[0]) + gsub(g, n.sons[1]) of nkHiddenStdConv, nkHiddenSubConv, nkHiddenCallConv: gsub(g, n.sons[0]) of nkCast: put(g, tkCast, "cast") diff --git a/rod/scanner.nim b/rod/scanner.nim index 348c5c73d5..f40b8769d6 100755 --- a/rod/scanner.nim +++ b/rod/scanner.nim @@ -58,7 +58,7 @@ type tkYield, #[[[end]]] tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit, tkFloatLit, tkFloat32Lit, tkFloat64Lit, tkStrLit, tkRStrLit, tkTripleStrLit, - tkCallRStrLit, tkCallTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe, + tkGStrLit, tkGTripleStrLit, tkCharLit, tkParLe, tkParRi, tkBracketLe, tkBracketRi, tkCurlyLe, tkCurlyRi, tkBracketDotLe, tkBracketDotRi, # [. and .] tkCurlyDotLe, tkCurlyDotRi, # {. and .} @@ -91,7 +91,7 @@ const "yield", #[[[end]]] "tkIntLit", "tkInt8Lit", "tkInt16Lit", "tkInt32Lit", "tkInt64Lit", "tkFloatLit", "tkFloat32Lit", "tkFloat64Lit", "tkStrLit", "tkRStrLit", - "tkTripleStrLit", "tkCallRStrLit", "tkCallTripleStrLit", "tkCharLit", "(", + "tkTripleStrLit", "tkGStrLit", "tkGTripleStrLit", "tkCharLit", "(", ")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)", ",", ";", ":", "=", ".", "..", "^", "tkOpr", "tkComment", "`", "[new indentation]", "[same indentation]", "[dedentation]", "tkSpaces", "tkInfixOpr", @@ -587,10 +587,11 @@ proc getSymbol(L: var TLexer, tok: var TToken) = tok.tokType = tkSymbol else: tok.tokType = TTokType(tok.ident.id + ord(tkSymbol)) - if buf[pos] == '\"': - getString(L, tok, true) - if tok.tokType == tkRStrLit: tok.tokType = tkCallRStrLit - else: tok.tokType = tkCallTripleStrLit + when false: + if buf[pos] == '\"': + getString(L, tok, true) + if tok.tokType == tkRStrLit: tok.tokType = tkCallRStrLit + else: tok.tokType = tkCallTripleStrLit proc getOperator(L: var TLexer, tok: var TToken) = var pos = L.bufpos @@ -770,7 +771,13 @@ proc rawGetTok(L: var TLexer, tok: var TToken) = tok.tokType = tkAccent Inc(L.bufpos) of '\"': - getString(L, tok, false) + # check for extended raw string literal: + var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars + getString(L, tok, rawMode) + if rawMode: + # tkRStrLit -> tkGStrLit + # tkTripleStrLit -> tkGTripleStrLit + inc(tok.tokType, 2) of '\'': tok.tokType = tkCharLit getCharacter(L, tok) diff --git a/tests/accept/run/tregex.nim b/tests/accept/run/tregex.nim index 43dc8f99c2..882f981321 100755 --- a/tests/accept/run/tregex.nim +++ b/tests/accept/run/tregex.nim @@ -6,7 +6,8 @@ import if "keyA = valueA" =~ re"\s*(\w+)\s*\=\s*(\w+)": write(stdout, "key: ", matches[0]) -elif "# comment!" =~ re"\s*(\#.*)": +elif "# comment!" =~ re.re"\s*(\#.*)": + # test re.re"" syntax echo("comment: ", matches[0]) else: echo("Bug!") diff --git a/todo.txt b/todo.txt index d051a3d810..73f598fd95 100755 --- a/todo.txt +++ b/todo.txt @@ -1,7 +1,6 @@ - thread support: threadvar on Windows seems broken; add --deadlock_prevention:on|off switch - built-in serialization -- change how generalized raw string literals work - we need a way to disable tests - deprecate ^ and make it available as operator diff --git a/web/news.txt b/web/news.txt index 7f013732ae..1548fdc0f5 100755 --- a/web/news.txt +++ b/web/news.txt @@ -32,6 +32,8 @@ Changes affecting backwards compatibility instead. - Deprecated ``os.getApplicationDir``: Use ``os.getAppDir`` instead. +- Changed and documented how generalized string literals work: The syntax + ``module.re"abc"`` is now supported. Additions