newStringOfCap implemented and used to optimize some procs

2026-02-13 23:03:36 +00:00 · 2011-05-08 17:38:34 +02:00
parent 1893f4101a
commit d2e2d71d05
10 changed files with 53 additions and 21 deletions
--- a/compiler/ast.nim
+++ b/compiler/ast.nim
@@ -337,7 +337,9 @@ type
    mFields, mFieldPairs,
    mAppendStrCh, mAppendStrStr, mAppendSeqElem, 
    mInRange, mInSet, mRepr, mExit, mSetLengthStr, mSetLengthSeq, mAssert, 
-    mSwap, mIsNil, mArrToSeq, mCopyStr, mCopyStrLast, mNewString, mReset, 
+    mSwap, mIsNil, mArrToSeq, mCopyStr, mCopyStrLast, 
+    mNewString, mNewStringOfCap,
+    mReset,
    mArray, mOpenArray, mRange, mSet, mSeq, 
    mOrdinal, mInt, mInt8, mInt16, mInt32, 
    mInt64, mFloat, mFloat32, mFloat64, mBool, mChar, mString, mCstring, 
--- a/compiler/ccgexprs.nim
+++ b/compiler/ccgexprs.nim
@@ -1457,7 +1457,8 @@ proc genMagicExpr(p: BProc, e: PNode, d: var TLoc, op: TMagic) =
  of mIncl, mExcl, mCard, mLtSet, mLeSet, mEqSet, mMulSet, mPlusSet, mMinusSet,
     mInSet:
    genSetOp(p, e, d, op)
-  of mNewString, mCopyStr, mCopyStrLast, mExit: genCall(p, e, d)
+  of mNewString, mNewStringOfCap, mCopyStr, mCopyStrLast, mExit: 
+    genCall(p, e, d)
  of mReset: genReset(p, e)
  of mEcho: genEcho(p, e)
  of mArrToSeq: genArrToSeq(p, e, d)
--- a/compiler/evals.nim
+++ b/compiler/evals.nim
@@ -990,7 +990,13 @@ proc evalMagicOrCall(c: PEvalContext, n: PNode): PNode =
    var a = result
    result = newNodeIT(nkStrLit, n.info, n.typ)
    result.strVal = newString(int(getOrdValue(a)))
-  else: 
+  of mNewStringOfCap:
+    result = evalAux(c, n.sons[1], {})
+    if isSpecial(result): return 
+    var a = result
+    result = newNodeIT(nkStrLit, n.info, n.typ)
+    result.strVal = newString(0)
+  else:
    result = evalAux(c, n.sons[1], {})
    if isSpecial(result): return 
    var a = result
--- a/compiler/semfold.nim
+++ b/compiler/semfold.nim
@@ -1,7 +1,7 @@
 #
 #
 #           The Nimrod Compiler
-#        (c) Copyright 2010 Andreas Rumpf
+#        (c) Copyright 2011 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -205,7 +205,8 @@ proc evalOp(m: TMagic, n, a, b, c: PNode): PNode =
  of mCompileOptionArg:
    result = newIntNodeT(Ord(
      testCompileOptionArg(getStr(a), getStr(b), n.info)), n)
-  of mNewString, mExit, mInc, ast.mDec, mEcho, mAssert, mSwap, mAppendStrCh, 
+  of mNewString, mNewStringOfCap, 
+     mExit, mInc, ast.mDec, mEcho, mAssert, mSwap, mAppendStrCh, 
     mAppendStrStr, mAppendSeqElem, mSetLengthStr, mSetLengthSeq, 
     mNLen..mNError, mEqRef: 
    nil
--- a/doc/apis.txt
+++ b/doc/apis.txt
@@ -32,6 +32,7 @@ get                     get, ``[]``    consider overloading ``[]`` for get;
                                       prefix: ``len`` instead of ``getLen``
 length                  len            also used for *number of elements*
 size                    size, len      size should refer to a byte size
+capacity                cap
 memory                  mem            implies a low-level operation
 items                   items          default iterator over a collection
 pairs                   pairs          iterator over (key, value) pairs
--- a/lib/pure/cgi.nim
+++ b/lib/pure/cgi.nim
@@ -36,7 +36,7 @@ proc URLencode*(s: string): string =
  ## ``{'A'..'Z', 'a'..'z', '0'..'9', '_'}`` are carried over to the result,
  ## a space is converted to ``'+'`` and every other character is encoded as
  ## ``'%xx'`` where ``xx`` denotes its hexadecimal value. 
-  result = ""
+  result = newStringOfCap(s.len + s.len shr 2) # assume 12% non-alnum-chars
  for i in 0..s.len-1:
    case s[i]
    of 'a'..'z', 'A'..'Z', '0'..'9', '_': add(result, s[i])
@@ -57,8 +57,9 @@ proc URLdecode*(s: string): string =
  ## is converted to a space, ``'%xx'`` (where ``xx`` denotes a hexadecimal
  ## value) is converted to the character with ordinal number ``xx``, and  
  ## and every other character is carried over. 
-  result = ""
+  result = newString(s.len)
  var i = 0
+  var j = 0
  while i < s.len:
    case s[i]
    of '%': 
@@ -66,10 +67,12 @@ proc URLdecode*(s: string): string =
      handleHexChar(s[i+1], x)
      handleHexChar(s[i+2], x)
      inc(i, 2)
-      add(result, chr(x))
-    of '+': add(result, ' ')
-    else: add(result, s[i])
+      result[j] = chr(x)
+    of '+': result[j] = ' '
+    else: result[j] = s[i]
    inc(i)
+    inc(j)
+  setLen(result, j)

 proc addXmlChar(dest: var string, c: Char) {.inline.} = 
  case c
@@ -86,7 +89,7 @@ proc XMLencode*(s: string): string =
  ## * ``>`` is replaced by ``&gt;``
  ## * ``&`` is replaced by ``&amp;``
  ## * every other character is carried over.
-  result = ""
+  result = newStringOfCap(s.len + s.len shr 2)
  for i in 0..len(s)-1: addXmlChar(result, s[i])

 type
@@ -367,4 +370,8 @@ proc existsCookie*(name: string): bool =
  if gcookies == nil: gcookies = parseCookies(getHttpCookie())
  result = hasKey(gcookies, name)

+when isMainModule:
+  const test1 = "abc\L+def xyz"
+  assert UrlEncode(test1) == "abc%0A%2Bdef+xyz"
+  assert UrlDecode(UrlEncode(test1)) == test1

--- a/lib/pure/json.nim
+++ b/lib/pure/json.nim
@@ -620,7 +620,8 @@ proc nl(s: var string, ml: bool) =

 proc escapeJson*(s: string): string = 
  ## Converts a string `s` to its JSON representation.
-  result = "\""
+  result = newStringOfCap(s.len + s.len shr 3)
+  result.add("\"")
  for x in runes(s):
    var r = int(x)
    if r >= 32 and r <= 127:
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -89,12 +89,16 @@ proc normalize*(s: string): string {.noSideEffect, procvar,
  rtl, extern: "nsuNormalize".} =
  ## Normalizes the string `s`. That means to convert it to lower case and
  ## remove any '_'. This is needed for Nimrod identifiers for example.
-  result = ""
+  result = newString(s.len)
+  var j = 0
  for i in 0..len(s) - 1:
    if s[i] in {'A'..'Z'}:
-      add result, Chr(Ord(s[i]) + (Ord('a') - Ord('A')))
+      result[j] = Chr(Ord(s[i]) + (Ord('a') - Ord('A')))
+      inc j
    elif s[i] != '_':
-      add result, s[i]
+      result[j] = s[i]
+      inc j
+  if j != s.len: setLen(result, j)

 proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
  rtl, extern: "nsuCmpIgnoreCase", procvar.} =
@@ -226,13 +230,14 @@ proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect,
  ##
  ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is
  ## raised if an ill-formed format string has been passed to the `%` operator.
-  result = ""
+  result = newStringOfCap(formatstr.len + a.len shl 4)
  addf(result, formatstr, a)

 proc `%` *(formatstr, a: string): string {.noSideEffect, 
  rtl, extern: "nsuFormatSingleElem".} =
  ## This is the same as ``formatstr % [a]``.
-  return formatstr % [a]
+  result = newStringOfCap(formatstr.len + a.len)
+  addf(result, formatstr, [a])

 proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect,
  rtl, extern: "nsuStrip".} =
@@ -510,7 +515,7 @@ proc wordWrap*(s: string, maxLineWidth = 80,
               newLine = "\n"): string {.
               noSideEffect, rtl, extern: "nsuWordWrap".} = 
  ## word wraps `s`.
-  result = ""
+  result = newStringOfCap(s.len + s.len shr 6)
  var SpaceLeft = maxLineWidth
  for word, isSep in tokenize(s, seps):
    if len(word) > SpaceLeft:
@@ -804,7 +809,8 @@ proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
  ## The procedure has been designed so that its output is usable for many
  ## different common syntaxes. The resulting string is prefixed with
  ## `prefix` and suffixed with `suffix`. Both may be empty strings.
-  result = prefix
+  result = newStringOfCap(s.len + s.len shr 2)
+  result.add(prefix)
  for c in items(s):
    case c
    of '\0'..'\31', '\128'..'\255':
--- a/lib/system.nim
+++ b/lib/system.nim
@@ -685,7 +685,13 @@ proc newString*(len: int): string {.
  ## content. One needs to fill the string character after character
  ## with the index operator ``s[i]``. This procedure exists only for
  ## optimization purposes; the same effect can be achieved with the
-  ## ``&`` operator.
+  ## ``&`` operator or with ``add``.
+
+proc newStringOfCap*(cap: int): string {.
+  magic: "NewStringOfCap", importc: "rawNewString", noSideEffect.}
+  ## returns a new string of length ``0`` but with capacity `cap`.This
+  ## procedure exists only for optimization purposes; the same effect can 
+  ## be achieved with the ``&`` operator or with ``add``.

 proc `&` * (x: string, y: char): string {.
  magic: "ConStrStr", noSideEffect, merge.}
--- a/web/news.txt
+++ b/web/news.txt
@@ -74,10 +74,11 @@ Additions
 - Added the ``linearScanEnd``, ``unroll``, ``shallow`` pragmas.
 - Added ``system.reset`` and a version of ``system.open`` that 
  returns a ``TFile`` and raises an exception in case of an error.
- The compiler now might use a hashing for string case statements depending
+- The compiler now might use hashing for string case statements depending
  on the number of string literals in the case statement.
 - Added a wrapper for ``redis``.
 - The compiler now supports array, sequence and string slicing.
+- Added ``system.newStringOfCap``.


 2010-10-20 Version 0.8.10 released