parseopt: keep the seq of arguments as given; fixes various command line parsing edge cases; refs #6818

2026-07-26 10:32:01 +00:00 · 2018-08-28 09:39:24 +02:00
parent 9c3cba1c22
commit 6f16166c60
1 changed files with 75 additions and 34 deletions
--- a/lib/pure/parseopt.nim
+++ b/lib/pure/parseopt.nim
@@ -49,20 +49,24 @@ type
    inShortState: bool
    shortNoVal: set[char]
    longNoVal: seq[string]
+    cmds: seq[string]
+    idx: int
    kind*: CmdLineKind        ## the dected command line token
    key*, val*: TaintedString ## key and value pair; ``key`` is the option
                              ## or the argument, ``value`` is not "" if
                              ## the option was given a value

 proc parseWord(s: string, i: int, w: var string,
-               delim: set[char] = {'\x09', ' '}): int =
+               delim: set[char] = {'\t', ' '}): int =
  result = i
  if result < s.len and s[result] == '\"':
    inc(result)
-    while result < s.len and s[result] != '\"':
+    while result < s.len:
+      if s[result] == '"':
+        inc result
+        break
      add(w, s[result])
      inc(result)
-    if result < s.len and s[result] == '\"': inc(result)
  else:
    while result < s.len and s[result] notin delim:
      add(w, s[result])
@@ -73,7 +77,7 @@ when declared(os.paramCount):
    if find(s, {' ', '\t'}) >= 0 and s.len > 0 and s[0] != '"':
      if s[0] == '-':
        result = newStringOfCap(s.len)
-        var i = parseWord(s, 0, result, {' ', '\x09', ':', '='})
+        var i = parseWord(s, 0, result, {' ', '\t', ':', '='})
        if i < s.len and s[i] in {':','='}:
          result.add s[i]
          inc i
@@ -100,16 +104,21 @@ when declared(os.paramCount):
    ## (though they still need at least a space).  In both cases, ':' or '='
    ## may still be used if desired.  They just become optional.
    result.pos = 0
+    result.idx = 0
    result.inShortState = false
    result.shortNoVal = shortNoVal
    result.longNoVal = longNoVal
    if cmdline != "":
      result.cmd = cmdline
+      result.cmds = parseCmdLine(cmdline)
    else:
      result.cmd = ""
+      result.cmds = newSeq[string](paramCount())
      for i in countup(1, paramCount()):
-        result.cmd.add quote(paramStr(i).string)
+        result.cmds[i-1] = paramStr(i).string
+        result.cmd.add quote(result.cmds[i-1])
        result.cmd.add ' '
+
    result.kind = cmdEnd
    result.key = TaintedString""
    result.val = TaintedString""
@@ -120,80 +129,111 @@ when declared(os.paramCount):
    ## (as provided by the ``OS`` module) is taken. ``shortNoVal`` and
    ## ``longNoVal`` behavior is the same as for ``initOptParser(string,...)``.
    result.pos = 0
+    result.idx = 0
    result.inShortState = false
    result.shortNoVal = shortNoVal
    result.longNoVal = longNoVal
    result.cmd = ""
    if cmdline.len != 0:
+      result.cmds = newSeq[string](cmdline.len)
      for i in 0..<cmdline.len:
+        result.cmds[i] = cmdline[i]
        result.cmd.add quote(cmdline[i].string)
        result.cmd.add ' '
    else:
+      result.cmds = newSeq[string](paramCount())
      for i in countup(1, paramCount()):
-        result.cmd.add quote(paramStr(i).string)
+        result.cmds[i-1] = paramStr(i).string
+        result.cmd.add quote(result.cmds[i-1])
        result.cmd.add ' '
    result.kind = cmdEnd
    result.key = TaintedString""
    result.val = TaintedString""

-proc handleShortOption(p: var OptParser) =
+proc handleShortOption(p: var OptParser; cmd: string) =
  var i = p.pos
  p.kind = cmdShortOption
-  add(p.key.string, p.cmd[i])
+  add(p.key.string, cmd[i])
  inc(i)
  p.inShortState = true
-  while i < p.cmd.len and p.cmd[i] in {'\x09', ' '}:
+  while i < cmd.len and cmd[i] in {'\t', ' '}:
    inc(i)
    p.inShortState = false
-  if i < p.cmd.len and p.cmd[i] in {':', '='} or
+  if i < cmd.len and cmd[i] in {':', '='} or
      card(p.shortNoVal) > 0 and p.key.string[0] notin p.shortNoVal:
-    if i < p.cmd.len and p.cmd[i] in {':', '='}:
+    if i < cmd.len and cmd[i] in {':', '='}:
      inc(i)
    p.inShortState = false
-    while i < p.cmd.len and p.cmd[i] in {'\x09', ' '}: inc(i)
-    i = parseWord(p.cmd, i, p.val.string)
-  if i >= p.cmd.len: p.inShortState = false
-  p.pos = i
+    while i < cmd.len and cmd[i] in {'\t', ' '}: inc(i)
+    p.val = substr(cmd, i)
+    p.pos = 0
+    inc p.idx
+  else:
+    p.pos = i
+  if i >= cmd.len:
+    p.inShortState = false
+    p.pos = 0
+    inc p.idx

 proc next*(p: var OptParser) {.rtl, extern: "npo$1".} =
  ## parses the first or next option; ``p.kind`` describes what token has been
  ## parsed. ``p.key`` and ``p.val`` are set accordingly.
+  if p.idx >= p.cmds.len:
+    p.kind = cmdEnd
+    return
+
  var i = p.pos
-  while i < p.cmd.len and p.cmd[i] in {'\x09', ' '}: inc(i)
+  while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
  p.pos = i
  setLen(p.key.string, 0)
  setLen(p.val.string, 0)
  if p.inShortState:
-    handleShortOption(p)
-    return
-  if i >= p.cmd.len:
-    p.kind = cmdEnd
-    return
-  if p.cmd[i] == '-':
+    p.inShortState = false
+    if i >= p.cmds[p.idx].len:
+      inc(p.idx)
+      p.pos = 0
+      if p.idx >= p.cmds.len:
+        p.kind = cmdEnd
+        return
+    else:
+      handleShortOption(p, p.cmds[p.idx])
+      return
+
+  if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-':
    inc(i)
-    if i < p.cmd.len and p.cmd[i] == '-':
+    if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-':
      p.kind = cmdLongOption
      inc(i)
-      i = parseWord(p.cmd, i, p.key.string, {' ', '\x09', ':', '='})
-      while i < p.cmd.len and p.cmd[i] in {'\x09', ' '}: inc(i)
-      if i < p.cmd.len and p.cmd[i] in {':', '='} or
-          len(p.longNoVal) > 0 and p.key.string notin p.longNoVal:
-        if i < p.cmd.len and p.cmd[i] in {':', '='}:
+      i = parseWord(p.cmds[p.idx], i, p.key.string, {' ', '\t', ':', '='})
+      while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
+      if i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {':', '='}:
+        if i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {':', '='}:
          inc(i)
-        while i < p.cmd.len and p.cmd[i] in {'\x09', ' '}: inc(i)
-        p.pos = parseWord(p.cmd, i, p.val.string)
+        while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i)
+        p.val = p.cmds[p.idx].substr(i)
+      elif len(p.longNoVal) > 0 and p.key.string notin p.longNoVal and p.idx+1 < p.cmds.len:
+        p.val = p.cmds[p.idx+1]
+        inc p.idx
      else:
-        p.pos = i
+        p.val = ""
+      inc p.idx
+      p.pos = 0
    else:
      p.pos = i
-      handleShortOption(p)
+      handleShortOption(p, p.cmds[p.idx])
  else:
    p.kind = cmdArgument
-    p.pos = parseWord(p.cmd, i, p.key.string)
+    p.key = p.cmds[p.idx]
+    inc p.idx
+    p.pos = 0

 proc cmdLineRest*(p: OptParser): TaintedString {.rtl, extern: "npo$1".} =
  ## retrieves the rest of the command line that has not been parsed yet.
-  result = strip(substr(p.cmd, p.pos, len(p.cmd) - 1)).TaintedString
+  var res = ""
+  for i in p.idx..<p.cmds.len:
+    if i > p.idx: res.add ' '
+    res.add quote(p.cmds[i])
+  result = res.TaintedString

 iterator getopt*(p: var OptParser): tuple[kind: CmdLineKind, key, val: TaintedString] =
  ## This is an convenience iterator for iterating over the given OptParser object.
@@ -214,6 +254,7 @@ iterator getopt*(p: var OptParser): tuple[kind: CmdLineKind, key, val: TaintedSt
  ##     # no filename has been given, so we show the help:
  ##     writeHelp()
  p.pos = 0
+  p.idx = 0
  while true:
    next(p)
    if p.kind == cmdEnd: break