added support for advanced substitution expressions

2026-02-23 11:26:52 +00:00 · 2011-12-27 19:22:46 +01:00
parent 76f91b90e2
commit b336bf4039
8 changed files with 566 additions and 113 deletions
--- a/doc/lib.txt
+++ b/doc/lib.txt
@@ -108,6 +108,9 @@ String handling
 * `matchers <matchers.html>`_
  This module contains various string matchers for email addresses, etc.

+* `subexes <subexes.html>`_
+  This module implements advanted string substitution operations.
+

 Generic Operating System Services
 ---------------------------------
--- a/doc/subexes.txt
+++ b/doc/subexes.txt
@@ -0,0 +1,58 @@
+================================
+Substitution Expressions (subex)
+================================
+
+A *subex* (*Substitution Expression*) represents an advanted string 
+substitution. In contrast to a `regex`:idx: which deals with string analysis, a
+*subex* deals with string synthesis.
+
+Thanks to its conditional construct ``$[0|1|2|else]`` it supports 
+`internationalization`:idx: of format string literals quite well.
+
+
+=====================   =====================================================
+Notation                meaning
+=====================   =====================================================
+``$#``                  use first or next argument
+``$name``               use named argument
+``$1``                  use first argument
+``$-1``                 use last argument
+``${1..3}``             use arguments 1 to 3
+``${..}``               use all arguments
+``${#..}``              use all remaining arguments
+``${..-2}``             use all arguments except the last argument
+``${$1}``               use argument X where ``X = parseInt(arg[1])``
+``${$1..$2}``           use arguments X to Y where ``X = parseInt(arg[1])``
+                        and ``Y = parseInt(arg[2])``
+``$','{1..3}``          use arguments 1 to 3 and join them with ','
+``$','80c'\n'{..}``     use all arguments, join them with ','. Insert '\n'
+                        before the resulting string exceeds 80 chars.
+``$','8i'\n'{..}``      use all arguments, join them with ','. Insert '\n'
+                        after every 8th item.
+``$' '~{1..3}``         use arguments 1 to 3 with a leading space if the
+                        concatenation of ``1..3`` is not the empty string
+``$[zero|one|def]1``    use ``X = parseInt(arg[1])`` to determine which
+                        branch to use. If ``X == 0`` the 'zero' branch is
+                        selected, if ``X == 1`` the 'one' branch is
+                        selected, etc. Otherwise the 'def' branch is 
+                        selected. ``$x`` is interpreted in branches too.
+                        If a branch needs to contain ``|``, ``]`` put
+                        them in single quotes. To produce a verbatim single
+                        quote, use ``''``.
+=====================   =====================================================
+
+Examples
+========
+
+.. code-block:: nimrod
+
+  subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)"
+  
+  subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied"
+  
+  subex"$['''|'|''''|']']#" % "0" == "'|"
+  
+  subex("type\n  TEnum = enum\n    $', '40c'\n    '{..}") % [
+    "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"]
+    
+
--- a/lib/pure/strutils.nim
+++ b/lib/pure/strutils.nim
@@ -140,105 +140,6 @@ proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,

 {.pop.}

-proc findNormalized(x: string, inArray: openarray[string]): int =
-  var i = 0
-  while i < high(inArray):
-    if cmpIgnoreStyle(x, inArray[i]) == 0: return i
-    inc(i, 2) # incrementing by 1 would probably lead to a
-              # security hole...
-  return -1
-
-proc addf*(s: var string, formatstr: string, a: openarray[string]) {.
-  noSideEffect, rtl, extern: "nsuAddf".} =
-  ## The same as ``add(s, formatstr % a)``, but more efficient.
-  const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
-  var i = 0
-  var num = 0
-  while i < len(formatstr):
-    if formatstr[i] == '$':
-      case formatstr[i+1] # again we use the fact that strings
-                          # are zero-terminated here
-      of '#':
-        add s, a[num]
-        inc i, 2
-        inc num
-      of '$':
-        add s, '$'
-        inc(i, 2)
-      of '1'..'9':
-        var j = 0
-        inc(i) # skip $
-        while formatstr[i] in Digits:
-          j = j * 10 + ord(formatstr[i]) - ord('0')
-          inc(i)
-        add s, a[j - 1]
-      of '{':
-        var j = i+1
-        while formatstr[j] notin {'\0', '}'}: inc(j)
-        var x = findNormalized(substr(formatstr, i+2, j-1), a)
-        if x >= 0 and x < high(a): add s, a[x+1]
-        else: raise newException(EInvalidValue, "invalid format string")
-        i = j+1
-      of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
-        var j = i+1
-        while formatstr[j] in PatternChars: inc(j)
-        var x = findNormalized(substr(formatstr, i+1, j-1), a)
-        if x >= 0 and x < high(a): add s, a[x+1]
-        else: raise newException(EInvalidValue, "invalid format string")
-        i = j
-      else: raise newException(EInvalidValue, "invalid format string")
-    else:
-      add s, formatstr[i]
-      inc(i)
-
-proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect,
-  rtl, extern: "nsuFormatOpenArray".} =
-  ## The `substitution`:idx: operator performs string substitutions in
-  ## `formatstr` and returns a modified `formatstr`. This is often called
-  ## `string interpolation`:idx:.
-  ##
-  ## This is best explained by an example:
-  ##
-  ## .. code-block:: nimrod
-  ##   "$1 eats $2." % ["The cat", "fish"]
-  ##
-  ## Results in:
-  ##
-  ## .. code-block:: nimrod
-  ##   "The cat eats fish."
-  ##
-  ## The substitution variables (the thing after the ``$``) are enumerated
-  ## from 1 to ``a.len``.
-  ## To produce a verbatim ``$``, use ``$$``.
-  ## The notation ``$#`` can be used to refer to the next substitution variable:
-  ##
-  ## .. code-block:: nimrod
-  ##   "$# eats $#." % ["The cat", "fish"]
-  ##
-  ## Substitution variables can also be words (that is
-  ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even
-  ## indices are keys and with odd indices are the corresponding values.
-  ## An example:
-  ##
-  ## .. code-block:: nimrod
-  ##   "$animal eats $food." % ["animal", "The cat", "food", "fish"]
-  ##
-  ## Results in:
-  ##
-  ## .. code-block:: nimrod
-  ##   "The cat eats fish."
-  ##
-  ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is
-  ## raised if an ill-formed format string has been passed to the `%` operator.
-  result = newStringOfCap(formatstr.len + a.len shl 4)
-  addf(result, formatstr, a)
-
-proc `%` *(formatstr, a: string): string {.noSideEffect,
-  rtl, extern: "nsuFormatSingleElem".} =
-  ## This is the same as ``formatstr % [a]``.
-  result = newStringOfCap(formatstr.len + a.len)
-  addf(result, formatstr, [a])
-
 proc strip*(s: string, leading = true, trailing = true): string {.noSideEffect,
  rtl, extern: "nsuStrip".} =
  ## Strips whitespace from `s` and returns the resulting string.
@@ -467,16 +368,16 @@ proc ParseHexInt*(s: string): int {.noSideEffect, procvar,
      inc(i)
    of '\0': break
    else: raise newException(EInvalidValue, "invalid integer: " & s)
-
-proc parseBool*(s: string): bool =
-  ## Parses a value into a `bool`. If ``s`` is one of the following values:
-  ## ``y, yes, true, 1, on``, then returns `true`. If ``s`` is one of the
-  ## following values: ``n, no, false, 0, off``, then returns `false`.
-  ## If ``s`` is something else a ``EInvalidValue`` exception is raised.
-  case normalize(s)
-  of "y", "yes", "true", "1", "on": result = true
-  of "n", "no", "false", "0", "off": result = false
-  else: raise newException(EInvalidValue, "cannot interpret as a bool: " & s)
+
+proc parseBool*(s: string): bool =
+  ## Parses a value into a `bool`. If ``s`` is one of the following values:
+  ## ``y, yes, true, 1, on``, then returns `true`. If ``s`` is one of the
+  ## following values: ``n, no, false, 0, off``, then returns `false`.
+  ## If ``s`` is something else a ``EInvalidValue`` exception is raised.
+  case normalize(s)
+  of "y", "yes", "true", "1", "on": result = true
+  of "n", "no", "false", "0", "off": result = false
+  else: raise newException(EInvalidValue, "cannot interpret as a bool: " & s)

 proc repeatChar*(count: int, c: Char = ' '): string {.noSideEffect,
  rtl, extern: "nsuRepeatChar".} =
@@ -921,7 +822,7 @@ proc editDistance*(a, b: string): int {.noSideEffect,
  inc(len2)
  var half = len1 shr 1
  # initalize first row:
-  #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2 * sizeof(int)))
+  #var row = cast[ptr array[0..high(int) div 8, int]](alloc(len2*sizeof(int)))
  var row: seq[int]
  newSeq(row, len2)
  var e = s + len2 - 1 # end marker
@@ -1033,7 +934,7 @@ proc formatSize*(bytes: biggestInt, decimalSep = '.'): string =
  ##
  ## .. code-block:: nimrod
  ##
-  ##    formatSize(1'i64 shl 31 + 300'i64) == "4GB"
+  ##    formatSize(1'i64 shl 31 + 300'i64) == "2.204GB"
  ##    formatSize(4096) == "4KB"
  ##
  template frmt(a, b, c: expr): expr =
@@ -1051,6 +952,112 @@ proc formatSize*(bytes: biggestInt, decimalSep = '.'): string =
  else:
    result = insertSep($bytes) & "B"

+proc findNormalized(x: string, inArray: openarray[string]): int =
+  var i = 0
+  while i < high(inArray):
+    if cmpIgnoreStyle(x, inArray[i]) == 0: return i
+    inc(i, 2) # incrementing by 1 would probably lead to a
+              # security hole...
+  return -1
+
+proc addf*(s: var string, formatstr: string, a: openarray[string]) {.
+  noSideEffect, rtl, extern: "nsuAddf".} =
+  ## The same as ``add(s, formatstr % a)``, but more efficient.
+  const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
+  var i = 0
+  var num = 0
+  while i < len(formatstr):
+    if formatstr[i] == '$':
+      case formatstr[i+1] # again we use the fact that strings
+                          # are zero-terminated here
+      of '#':
+        add s, a[num]
+        inc i, 2
+        inc num
+      of '$':
+        add s, '$'
+        inc(i, 2)
+      of '1'..'9', '-':
+        var j = 0
+        inc(i) # skip $
+        var negative = formatstr[i] == '-'
+        if negative: inc i
+        while formatstr[i] in Digits:
+          j = j * 10 + ord(formatstr[i]) - ord('0')
+          inc(i)
+        if not negative:
+          add s, a[j - 1]
+        else:
+          add s, a[a.len - j]
+      of '{':
+        var j = i+1
+        while formatstr[j] notin {'\0', '}'}: inc(j)
+        var x = findNormalized(substr(formatstr, i+2, j-1), a)
+        if x >= 0 and x < high(a): add s, a[x+1]
+        else: raise newException(EInvalidValue, "invalid format string")
+        i = j+1
+      of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
+        var j = i+1
+        while formatstr[j] in PatternChars: inc(j)
+        var x = findNormalized(substr(formatstr, i+1, j-1), a)
+        if x >= 0 and x < high(a): add s, a[x+1]
+        else: raise newException(EInvalidValue, "invalid format string")
+        i = j
+      else:
+        raise newException(EInvalidValue, "invalid format string")
+    else:
+      add s, formatstr[i]
+      inc(i)
+
+proc `%` *(formatstr: string, a: openarray[string]): string {.noSideEffect,
+  rtl, extern: "nsuFormatOpenArray".} =
+  ## The `substitution`:idx: operator performs string substitutions in
+  ## `formatstr` and returns a modified `formatstr`. This is often called
+  ## `string interpolation`:idx:.
+  ##
+  ## This is best explained by an example:
+  ##
+  ## .. code-block:: nimrod
+  ##   "$1 eats $2." % ["The cat", "fish"]
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nimrod
+  ##   "The cat eats fish."
+  ##
+  ## The substitution variables (the thing after the ``$``) are enumerated
+  ## from 1 to ``a.len``.
+  ## To produce a verbatim ``$``, use ``$$``.
+  ## The notation ``$#`` can be used to refer to the next substitution
+  ## variable:
+  ##
+  ## .. code-block:: nimrod
+  ##   "$# eats $#." % ["The cat", "fish"]
+  ##
+  ## Substitution variables can also be words (that is
+  ## ``[A-Za-z_]+[A-Za-z0-9_]*``) in which case the arguments in `a` with even
+  ## indices are keys and with odd indices are the corresponding values.
+  ## An example:
+  ##
+  ## .. code-block:: nimrod
+  ##   "$animal eats $food." % ["animal", "The cat", "food", "fish"]
+  ##
+  ## Results in:
+  ##
+  ## .. code-block:: nimrod
+  ##   "The cat eats fish."
+  ##
+  ## The variables are compared with `cmpIgnoreStyle`. `EInvalidValue` is
+  ## raised if an ill-formed format string has been passed to the `%` operator.
+  result = newStringOfCap(formatstr.len + a.len shl 4)
+  addf(result, formatstr, a)
+
+proc `%` *(formatstr, a: string): string {.noSideEffect,
+  rtl, extern: "nsuFormatSingleElem".} =
+  ## This is the same as ``formatstr % [a]``.
+  result = newStringOfCap(formatstr.len + a.len)
+  addf(result, formatstr, [a])
+
 {.pop.}

 when isMainModule:
@@ -1066,3 +1073,6 @@ when isMainModule:
  echo formatSize(1'i64 shl 31 + 300'i64) # == "4,GB"
  echo formatSize(1'i64 shl 31)

+  doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
+           "The cat eats fish."
+
--- a/lib/pure/subexes.nim
+++ b/lib/pure/subexes.nim
@@ -0,0 +1,380 @@
+#
+#
+#            Nimrod's Runtime Library
+#        (c) Copyright 2011 Andreas Rumpf
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Nimrod support for `substitution expressions`:idx: (`subex`:idx:).
+##
+## .. include:: ../doc/subexes.txt
+##
+
+{.push debugger:off .} # the user does not want to trace a part
+                       # of the standard library!
+
+from strutils import parseInt, cmpIgnoreStyle, Digits
+include "system/inclrtl"
+
+
+proc findNormalized(x: string, inArray: openarray[string]): int =
+  var i = 0
+  while i < high(inArray):
+    if cmpIgnoreStyle(x, inArray[i]) == 0: return i
+    inc(i, 2) # incrementing by 1 would probably lead to a
+              # security hole...
+  return -1
+
+type
+  EInvalidSubex* = object of EInvalidValue ## exception that is raised for
+                                           ## an invalid subex
+
+proc raiseInvalidFormat(msg: string) {.noinline.} =
+  raise newException(EInvalidSubex, "invalid format string: " & msg)
+
+type
+  TFormatParser = object {.pure, final.}
+    f: cstring
+    num, i, lineLen: int
+
+template call(x: stmt) =
+  p.i = i
+  x
+  i = p.i
+
+template callNoLineLenTracking(x: stmt) =
+  let oldLineLen = p.lineLen
+  p.i = i
+  x
+  i = p.i
+  p.lineLen = oldLineLen
+
+proc getFormatArg(p: var TFormatParser, a: openArray[string]): int =
+  const PatternChars = {'a'..'z', 'A'..'Z', '0'..'9', '\128'..'\255', '_'}
+  var i = p.i
+  var f = p.f
+  case f[i]
+  of '#':
+    result = p.num
+    inc i
+    inc p.num
+  of '1'..'9', '-':
+    var j = 0
+    var negative = f[i] == '-'
+    if negative: inc i
+    while f[i] in Digits:
+      j = j * 10 + ord(f[i]) - ord('0')
+      inc i
+    result = if not negative: j-1 else: a.len-j
+  of 'a'..'z', 'A'..'Z', '\128'..'\255', '_':
+    var name = ""
+    while f[i] in PatternChars: 
+      name.add(f[i])
+      inc(i)
+    result = findNormalized(name, a)+1
+  of '$':
+    inc(i)
+    call:
+      result = getFormatArg(p, a)
+    result = parseInt(a[result])-1
+  else:
+    raiseInvalidFormat("'#', '$', number or identifier expected")
+  if result >=% a.len: raiseInvalidFormat("index out of bounds: " & $result)
+  p.i = i
+
+proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string)
+
+proc emitChar(p: var TFormatParser, x: var string, ch: char) {.inline.} =
+  x.add(ch)
+  if ch == '\L': p.lineLen = 0
+  else: inc p.lineLen
+
+proc emitStrLinear(p: var TFormatParser, x: var string, y: string) {.inline.} =
+  for ch in items(y): emitChar(p, x, ch)
+
+proc emitStr(p: var TFormatParser, x: var string, y: string) {.inline.} =
+  x.add(y)
+  inc p.lineLen, y.len
+
+proc scanQuote(p: var TFormatParser, x: var string, toAdd: bool) =
+  var i = p.i+1
+  var f = p.f
+  while true:
+    if f[i] == '\'':
+      inc i
+      if f[i] != '\'': break
+      inc i
+      if toAdd: emitChar(p, x, '\'')
+    elif f[i] == '\0': raiseInvalidFormat("closing \"'\" expected")
+    else:
+      if toAdd: emitChar(p, x, f[i])
+      inc i
+  p.i = i
+
+proc scanBranch(p: var TFormatParser, a: openArray[string],
+                x: var string, choice: int) =
+  var i = p.i
+  var f = p.f
+  var c = 0
+  var elsePart = i
+  var toAdd = choice == 0
+  while true:
+    case f[i]
+    of ']': break
+    of '|': 
+      inc i
+      elsePart = i
+      inc c
+      if toAdd: break
+      toAdd = choice == c
+    of '\'':
+      call: scanQuote(p, x, toAdd)
+    of '\0': raiseInvalidFormat("closing ']' expected")
+    else:
+      if toAdd:
+        if f[i] == '$':
+          inc i
+          call: scanDollar(p, a, x)
+        else:
+          emitChar(p, x, f[i])
+          inc i
+      else:
+        inc i
+  if not toAdd and choice >= 0:
+    # evaluate 'else' part:
+    var last = i
+    i = elsePart
+    while true:
+      case f[i]
+      of '|', ']': break
+      of '\'':
+        call: scanQuote(p, x, true)
+      of '$':
+        inc i
+        call: scanDollar(p, a, x)
+      else:
+        emitChar(p, x, f[i])
+        inc i
+    i = last
+  p.i = i+1
+
+proc scanSlice(p: var TFormatParser, a: openarray[string]): tuple[x, y: int] =
+  var slice = false
+  var i = p.i
+  var f = p.f
+  
+  if f[i] == '{': inc i
+  else: raiseInvalidFormat("'{' expected")
+  if f[i] == '.' and f[i+1] == '.':
+    inc i, 2
+    slice = true
+  else:
+    call: result.x = getFormatArg(p, a)
+    if f[i] == '.' and f[i+1] == '.':
+      inc i, 2
+      slice = true
+  if slice:
+    if f[i] != '}':
+      call: result.y = getFormatArg(p, a)
+    else:
+      result.y = high(a)
+  else:
+    result.y = result.x
+  if f[i] != '}': raiseInvalidFormat("'}' expected")
+  inc i
+  p.i = i
+  
+proc scanDollar(p: var TFormatParser, a: openarray[string], s: var string) =
+  var i = p.i
+  var f = p.f
+  case f[i]
+  of '$': 
+    emitChar p, s, '$'
+    inc i
+  of '{':
+    call:
+      let (x, y) = scanSlice(p, a)
+    for j in x..y: emitStr p, s, a[j]
+  of '[':
+    inc i
+    var start = i
+    call: scanBranch(p, a, s, -1)
+    var x: int
+    if f[i] == '{':
+      inc i
+      call: x = getFormatArg(p, a)
+      if f[i] != '}': raiseInvalidFormat("'}' expected")
+      inc i
+    else:
+      call: x = getFormatArg(p, a)
+    var last = i
+    let choice = parseInt(a[x])
+    i = start
+    call: scanBranch(p, a, s, choice)
+    i = last
+  of '\'':
+    var sep = ""
+    callNoLineLenTracking: scanQuote(p, sep, true)
+    if f[i] == '~':
+      # $' '~{1..3}
+      # insert space followed by 1..3 if not empty
+      inc i
+      call: 
+        let (x, y) = scanSlice(p, a)
+      var L = 0
+      for j in x..y: inc L, a[j].len
+      if L > 0:
+        emitStrLinear p, s, sep
+        for j in x..y: emitStr p, s, a[j]
+    else:
+      block StringJoin:
+        block OptionalLineLengthSpecifier:
+          var maxLen = 0
+          case f[i]
+          of '0'..'9':
+            while f[i] in Digits:
+              maxLen = maxLen * 10 + ord(f[i]) - ord('0')
+              inc i
+          of '$':
+            # do not skip the '$' here for `getFormatArg`!
+            call:
+              maxLen = getFormatArg(p, a)
+          else: break OptionalLineLengthSpecifier
+          var indent = ""
+          case f[i]
+          of 'i':
+            inc i
+            callNoLineLenTracking: scanQuote(p, indent, true)
+            
+            call:
+              let (x, y) = scanSlice(p, a)
+            if maxLen < 1: emitStrLinear(p, s, indent)
+            var items = 1
+            emitStr p, s, a[x]
+            for j in x+1..y:
+              emitStr p, s, sep
+              if items >= maxLen: 
+                emitStrLinear p, s, indent
+                items = 0
+              emitStr p, s, a[j]
+              inc items
+          of 'c':
+            inc i
+            callNoLineLenTracking: scanQuote(p, indent, true)
+            
+            call:
+              let (x, y) = scanSlice(p, a)
+            if p.lineLen + a[x].len > maxLen: emitStrLinear(p, s, indent)
+            emitStr p, s, a[x]
+            for j in x+1..y:
+              emitStr p, s, sep
+              if p.lineLen + a[j].len > maxLen: emitStrLinear(p, s, indent)
+              emitStr p, s, a[j]
+            
+          else: raiseInvalidFormat("unit 'c' (chars) or 'i' (items) expected")
+          break StringJoin
+
+        call:
+          let (x, y) = scanSlice(p, a)
+        emitStr p, s, a[x]
+        for j in x+1..y:
+          emitStr p, s, sep
+          emitStr p, s, a[j]
+  else:
+    call: 
+      var x = getFormatArg(p, a)
+    emitStr p, s, a[x]
+  p.i = i
+
+
+type
+  TSubex* = distinct string ## string that contains a substitution expression
+
+proc subex*(s: string): TSubex =
+  ## constructs a *substitution expression* from `s`. Currently this performs
+  ## no syntax checking but this may change in later versions.
+  result = TSubex(s)
+
+proc addf*(s: var string, formatstr: TSubex, a: openarray[string]) {.
+           noSideEffect, rtl, extern: "nfrmtAddf".} =
+  ## The same as ``add(s, formatstr % a)``, but more efficient.
+  var p: TFormatParser
+  p.f = formatstr.string
+  var i = 0
+  while i < len(formatstr.string):
+    if p.f[i] == '$':
+      inc i
+      call: scanDollar(p, a, s)
+    else:
+      emitChar(p, s, p.f[i])
+      inc(i)
+
+proc `%` *(formatstr: TSubex, a: openarray[string]): string {.noSideEffect,
+  rtl, extern: "nfrmtFormatOpenArray".} =
+  ## The `substitution`:idx: operator performs string substitutions in
+  ## `formatstr` and returns a modified `formatstr`. This is often called
+  ## `string interpolation`:idx:.
+  ##
+  result = newStringOfCap(formatstr.string.len + a.len shl 4)
+  addf(result, formatstr, a)
+
+proc `%` *(formatstr: TSubex, a: string): string {.noSideEffect,
+  rtl, extern: "nfrmtFormatSingleElem".} =
+  ## This is the same as ``formatstr % [a]``.
+  result = newStringOfCap(formatstr.string.len + a.len)
+  addf(result, formatstr, [a])
+
+{.pop.}
+
+when isMainModule:
+
+  proc `%`(formatstr: string, a: openarray[string]): string =
+    result = newStringOfCap(formatstr.len + a.len shl 4)
+    addf(result, formatstr.TSubex, a)
+
+  proc `%`(formatstr: string, a: string): string =
+    result = newStringOfCap(formatstr.len + a.len)
+    addf(result, formatstr.TSubex, [a])
+
+
+  doAssert "$# $3 $# $#" % ["a", "b", "c"] == "a c b c"
+  doAssert "$animal eats $food." % ["animal", "The cat", "food", "fish"] ==
+           "The cat eats fish."
+
+
+  doAssert "$[abc|def]# $3 $# $#" % ["17", "b", "c"] == "def c b c"
+  doAssert "$[abc|def]# $3 $# $#" % ["1", "b", "c"] == "def c b c"
+  doAssert "$[abc|def]# $3 $# $#" % ["0", "b", "c"] == "abc c b c"
+  doAssert "$[abc|def|]# $3 $# $#" % ["17", "b", "c"] == " c b c"
+
+  doAssert "$[abc|def|]# $3 $# $#" % ["-9", "b", "c"] == " c b c"
+  doAssert "$1($', '{2..})" % ["f", "a", "b"] == "f(a, b)"
+
+  doAssert "$[$1($', '{2..})|''''|fg'$3']1" % ["7", "a", "b"] == "fg$3"
+  
+  doAssert "$[$#($', '{#..})|''''|$3]1" % ["0", "a", "b"] == "0(a, b)"
+  doAssert "$' '~{..}" % "" == ""
+  doAssert "$' '~{..}" % "P0" == " P0"
+  doAssert "${$1}" % "1" == "1"
+  doAssert "${$$-1} $$1" % "1" == "1 $1"
+           
+  doAssert "$#($', '10c'\n    '{#..})" % ["doAssert", "longishA", "longish"] ==
+           """doAssert(
+    longishA, 
+    longish)"""
+  
+  echo "type TMyEnum* = enum\n  $', '2i'\n  '{..}" % ["fieldA", 
+    "fieldB", "FiledClkad", "fieldD", "fieldE", "longishFieldName"]
+  
+  doAssert subex"$1($', '{2..})" % ["f", "a", "b", "c"] == "f(a, b, c)"
+  
+  doAssert subex"$1 $[files|file|files]{1} copied" % ["1"] == "1 file copied"
+  
+  doAssert subex"$['''|'|''''|']']#" % "0" == "'|"
+  
+  echo subex("type\n  TEnum = enum\n    $', '40c'\n    '{..}") % [
+    "fieldNameA", "fieldNameB", "fieldNameC", "fieldNameD"]
+  
+  
--- a/tests/compile/tlibs.nim
+++ b/tests/compile/tlibs.nim
@@ -15,7 +15,7 @@ import
  lua, lualib, lauxlib, mysql, sqlite3, python, tcl,
  db_postgres, db_mysql, db_sqlite, ropes, sockets, browsers, httpserver,
  httpclient, parseutils, unidecode, xmldom, xmldomparser, xmltree, xmlparser,
-  htmlparser, re, graphics, colors, pegs
+  htmlparser, re, graphics, colors, pegs, subexes
  
 when defined(linux):
  import
--- a/todo.txt
+++ b/todo.txt
@@ -1,6 +1,7 @@
 version 0.8.14
 ==============

+- document subexes; i18n module
 - GC should care about interior pointers on the stack
 - BUG: type TX = TTable[string, int]
 - warning for implicit openArray -> varargs conversion
--- a/web/news.txt
+++ b/web/news.txt
@@ -140,6 +140,7 @@ Library Additions
 - Added ``irc`` module.
 - Added ``ftpclient`` module.
 - Added ``memfiles`` module.
+- Added ``subexes`` module.
 - Added ``osproc.startCmd``, ``osproc.execCmdEx``.
 - The ``osproc`` module now uses ``posix_spawn`` instead of ``fork`` 
  and ``exec`` on Posix systems. Define the symbol ``useFork`` to revert to
--- a/web/nimrod.ini
+++ b/web/nimrod.ini
@@ -42,7 +42,7 @@ srcdoc: "impure/rdstdin;wrappers/zmq;wrappers/sphinx"
 srcdoc: "pure/collections/tables;pure/collections/sets;pure/collections/lists"
 srcdoc: "pure/collections/intsets;pure/collections/queues;pure/encodings"
 srcdoc: "pure/events;pure/collections/sequtils;pure/irc;ecmas/dom"
-srcdoc: "pure/ftpclient;pure/memfiles"
+srcdoc: "pure/ftpclient;pure/memfiles;pure/subexes"

 webdoc: "wrappers/libcurl;pure/md5;wrappers/mysql;wrappers/iup"
 webdoc: "wrappers/sqlite3;wrappers/postgres;wrappers/tinyc"