From 7c873ca61584d6d70d63dd017f9d5fe0a2c103f3 Mon Sep 17 00:00:00 2001 From: Zoom Date: Mon, 16 Feb 2026 19:06:18 +0400 Subject: [PATCH] Feat: std: parseopt parser modes (#25506) Adds configurable parser modes to std/parseopt module. **Take two.** Initially solved the issue of not being able to pass arguments to short options as you do with most everyday CLI programs, but reading the tests made me add more features so that some of the behaviour could be changed and here we are. **`std/parseopt` now supports three parser modes** via an optional `mode` parameter in `initOptParser` and `getopt`. Three modes are provided: - `NimMode` (default, fully backward compatible), - `LaxMode` (POSIX-inspired with relaxed short option handling), - `GnuMode` (stricter GNU-style conventions). The new modes are marked as experimental in the documentation. The parser behaviour is controlled by a new `ParserRules` enum, which provides granular feature flags that modes are built from. This makes it possible for users with specific requirements to define custom rule sets by importing private symbols, this is mentioned but clearly marked as unsupported. **Backward compatibility:** The default mode preserves existing behaviour completely, with a single exception: `allowWhitespaceAfterColon` is deprecated. Now, `allowWhitespaceAfterColon` doesn't make much sense as a single tuning knob. The `ParserRule.prSepAllowDelimAfter` controls this now. As `allowWhitespaceAfterColon` had a default, most calls never mention it so they will silently migrate to the new `initOptParser` overload. To cover cases when the proc param was used at call-site, I added an overload, which modifies the default parser mode to reflect the required `allowWhitespaceAfterColon` value. Should be all smooth for most users, except the deprecation warning. The only thing I think can be classified as the breaking change is a surprising **bug** of the old parser: ```nim let p = initOptParser("-n 10 -m20 -k= 30 -40", shortNoVal = {'v'}) # ^-disappears ``` This is with the aforementioned `allowWhitespaceAfterColon` being true by default, of course. In this case the `30` token is skipped completely. I don't think that's right, so it's fixed. Things I still don't like about how the old parser and the new default mode behave: 1. **Parser behaviour is controlled by an emptiness of two containers**. This is an interesting approach. It's also made more interesting because the `shortNoVal`/`longNoVal` control both the namesakes, but *and also how their opposites (value-taking opts) work*. --- **Edit:** 2. `shortNoVal` is not mandatory: ```nim let p = initOptParser(@["-a=foo"], shortNoVal = {'a'}) # Nim, Lax parses as: (cmdShortOption, "a", "foo") # GnuMode parses as: (cmdShortOption, "a", "=foo") ``` In this case, even though the user specified `a` as no no-val, parser ignores it, relying only on the syntax to decide the kind of the argument. This is especially problematic with the modes that don't use the rule `prShortAllowSep` (GnuMode), in this case the provided input is twice invalid, regardless of the `shortNoVal`. With the current parser architecture, parsing it this way **is inevitable**, though. We don't have any way to signal the error state detected with the input, so the user is expected to validate the input for mistakes. Bundling positional arguments is nonsensical and short option can't use the separator character, so `[cmd "a", arg "=foo"]` and `[cmd "a", cmd "=", cmd "f"...]` are both out of the question **and** would complicate validating, requiring keeping track of a previous argument. Hope I'm clear enough on the issue. **Future work:** 1. Looks like the new modes are already usable, but from the discussions elsewhere it looks like we might want to support special-casing multi-digit short options (`-XX..`) to allow numerical options greater than 9. This complicates bundling, though, so requires a bit of thinking through. 2. Signaling error state? --------- Co-authored-by: Andreas Rumpf --- changelog.md | 4 + lib/pure/parseopt.nim | 655 +++++++++++++++++++++++++--------- tests/misc/tparseoptmodes.nim | 508 ++++++++++++++++++++++++++ 3 files changed, 996 insertions(+), 171 deletions(-) create mode 100644 tests/misc/tparseoptmodes.nim diff --git a/changelog.md b/changelog.md index 2a9c8aabf2..8d59320672 100644 --- a/changelog.md +++ b/changelog.md @@ -61,6 +61,10 @@ errors. - `system.setLenUninit` now supports refc, JS and VM backends. +- `std/parseopt` now supports multiple parser modes via a `CliMode` enum. + Modes include `Nim` (default, fully compatible) and two new experimental modes: + `Lax` and `Gnu` for different option parsing behaviors. + [//]: # "Changes:" - `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type. diff --git a/lib/pure/parseopt.nim b/lib/pure/parseopt.nim index 27b38d904b..5f6a82e4a5 100644 --- a/lib/pure/parseopt.nim +++ b/lib/pure/parseopt.nim @@ -14,6 +14,11 @@ ## Supported Syntax ## ================ ## +## The parser supports multiple `parser modes<#parser-modes>`_ that affect how +## options are interpreted. The syntax described here applies to the default +## `Nim` mode. See `Parser Modes<#parser-modes>`_ for details on alternative +## modes and their differences. +## ## The following syntax is supported when arguments for the `shortNoVal` and ## `longNoVal` parameters, which are ## `described later<#nimshortnoval-and-nimlongnoval>`_, are not provided: @@ -26,11 +31,12 @@ ## `CmdLineKind enum<#CmdLineKind>`_. ## ## When option values begin with ':' or '=', they need to be doubled up (as in -## `--delim::`) or alternated (as in `--delim=:`). +## `--foo::`) or alternated (as in `--foo=:`). ## ## The `--` option, commonly used to denote that every token that follows is ## an argument, is interpreted as a long option, and its name is the empty -## string. +## string. Trailing arguments can be accessed with `remainingArgs<#remainingArgs,OptParser>`_ +## or `cmdLineRest<#cmdLineRest,OptParser>`_. ## ## Parsing ## ======= @@ -48,30 +54,30 @@ ## ## Here is an example: ## -## ```Nim -## import std/parseopt -## -## var p = initOptParser("-ab -e:5 --foo --bar=20 file.txt") -## while true: -## p.next() -## case p.kind -## of cmdEnd: break -## of cmdShortOption, cmdLongOption: -## if p.val == "": -## echo "Option: ", p.key -## else: -## echo "Option and value: ", p.key, ", ", p.val -## of cmdArgument: -## echo "Argument: ", p.key -## -## # Output: -## # Option: a -## # Option: b -## # Option and value: e, 5 -## # Option: foo -## # Option and value: bar, 20 -## # Argument: file.txt -## ``` +runnableExamples: + + var p = initOptParser("-ab -e:5 --foo --bar=20 file.txt") + var output: seq[string] = @[] + while true: + p.next() + case p.kind + of cmdEnd: break + of cmdShortOption, cmdLongOption: + if p.val == "": + output.add("Option: " & p.key) + else: + output.add("Option and value: " & p.key & ", " & p.val) + of cmdArgument: + output.add("Argument: " & p.key) + + doAssert output == @[ + "Option: a", + "Option: b", + "Option and value: e, 5", + "Option: foo", + "Option and value: bar, 20", + "Argument: file.txt" + ] ## ## The `getopt iterator<#getopt.i,OptParser>`_, which is provided for ## convenience, can be used to iterate through all command line options as well. @@ -82,22 +88,23 @@ ## ## Here is an example: ## -## ```Nim -## import std/parseopt -## -## var varName: string = "defaultValue" -## -## for kind, key, val in getopt(): -## case kind -## of cmdArgument: -## discard -## of cmdLongOption, cmdShortOption: -## case key: -## of "varName": # --varName: in the console when executing -## varName = val # do input sanitization in production systems -## of cmdEnd: -## discard -## ``` +runnableExamples: + import std/strutils + + var varName: string = "defaultValue" + + for kind, key, val in getopt(@["--varName:HELLO"]): + case kind + of cmdArgument: + discard + of cmdLongOption, cmdShortOption: + case key + of "varName": # --varName: in the console when executing + varName = val.toLowerAscii() # do input sanitization in production + of cmdEnd: + discard + + doAssert varName == "hello" ## ## `shortNoVal` and `longNoVal` ## ============================ @@ -107,56 +114,198 @@ ## specifying which short and long options do not accept values. ## ## When `shortNoVal` is non-empty, users are not required to separate short -## options and their values with a ':' or '=' since the parser knows which +## options and their values with a `:` or `=` since the parser knows which ## options accept values and which ones do not. This behavior also applies for -## long options if `longNoVal` is non-empty. For short options, `-j4` -## becomes supported syntax, and for long options, `--foo bar` becomes -## supported. This is in addition to the `previously mentioned -## syntax<#supported-syntax>`_. Users can still separate options and their -## values with ':' or '=', but that becomes optional. +## long options if `longNoVal` is non-empty. +## +## For short options, `-j4` becomes supported syntax (parsed as option `j` with +## value `4` instead of two separate options `j` and `4`). For long options, +## `--foo bar` becomes supported syntax in all modes. In `LaxMode` and `GnuMode` +## modes, short options can also take values from the next argument (e.g., +## `-c val`), but this does **not** work in the default `Nim` mode. +## +## This is in addition to the `previously mentioned syntax<#supported-syntax>`_. +## Users can still separate options and their values with `:` or `=`, but that +## becomes optional. ## ## As more options which do not accept values are added to your program, ## remember to amend `shortNoVal` and `longNoVal` accordingly. ## +## The parser does not validate the input for syntax mistakes, thus, options +## can still have values if passed explicitly by the user, even when they are +## marked as `shortNoVal`/`longNoVal`. +## +## This behavior allows associating an option with the mistakenly passed value: +## +runnableExamples: + import std/[sequtils, os] + + let cmds = "-n:9 --foo:bar".parseCmdLine() + let parsed = toSeq(cmds.getopt(shortNoVal = {'n'}, longNoVal = @["foo"])) + for (kind, key, val) in parsed: + case kind + of cmdEnd: raise newException(AssertionDefect, "Unreachable") + of cmdShortOption, cmdLongOption: + if key in ["n", "foo"] and val != "": + # Substitute for proper error handling in your code + discard "Option " & key & " can't take values!" + else: discard + of cmdArgument: discard + doAssert parsed == @[ + (cmdShortOption, "n", "9"), + (cmdLongOption, "foo", "bar")] +## +## .. Important:: +## Next-argument value-taking for short/long options is only enabled when +## `shortNoVal`/`longNoVal` are non-empty. If your program has *no* options +## that take no value, you still must pass a non-empty placeholder (for example, +## `shortNoVal = {'\0'}` and/or `longNoVal = @[""]`) to enable this form. +## ## The following example illustrates the difference between having an empty ## `shortNoVal` and `longNoVal`, which is the default, and providing ## arguments for those two parameters: ## -## ```Nim -## import std/parseopt +runnableExamples: + + proc format(kind: CmdLineKind; key, val: string): string = + case kind + of cmdEnd: raise newException(AssertionDefect, "Unreachable") + of cmdShortOption, cmdLongOption: + if val == "": "Option: " & key + else: "Option and value: " & key & ", " & val + of cmdArgument: "Argument: " & key + + let cmdLine = "-j4 --first bar" + var output1, output2: seq[string] = @[] + + var emptyNoVal = initOptParser(cmdLine) + for kind, key, val in emptyNoVal.getopt(): + output1.add format(kind, key, val) + + doAssert output1 == @[ + "Option: j", + "Option: 4", + "Option: first", + "Argument: bar" + ] + + var withNoVal = cmdLine.initOptParser(shortNoVal = {'c'}, + longNoVal = @["second"]) + for kind, key, val in withNoVal.getopt(): + output2.add format(kind, key, val) + + doAssert output2 == @[ + "Option and value: j, 4", + "Option and value: first, bar" + ] ## -## proc printToken(kind: CmdLineKind, key: string, val: string) = -## case kind -## of cmdEnd: doAssert(false) # Doesn't happen with getopt() -## of cmdShortOption, cmdLongOption: -## if val == "": -## echo "Option: ", key -## else: -## echo "Option and value: ", key, ", ", val -## of cmdArgument: -## echo "Argument: ", key +## Parser Modes +## ============ ## -## let cmdLine = "-j4 --first bar" +## .. Warning:: Modes other than the default (`Nim`) are **experimental** and may +## change in future releases. ## -## var emptyNoVal = initOptParser(cmdLine) -## for kind, key, val in emptyNoVal.getopt(): -## printToken(kind, key, val) +## The parser supports several distinct rule sets that change how options are +## interpreted: ## -## # Output: -## # Option: j -## # Option: 4 -## # Option: first -## # Argument: bar +## 1. **LaxMode**: Most forgiving mode, combines `Nim` with POSIX-like +## short option handling. Tries to follow the POSIX_ guidelines where possible. +## 2. **NimMode**: Standard Nim parsing rules (default). +## 3. **GnuMode**: GNU-inspired parsing (e.g. `=` as the only delimiter). +## Puts some additional restrictions, following some of the GNU_ conventions. ## -## var withNoVal = initOptParser(cmdLine, shortNoVal = {'c'}, -## longNoVal = @["second"]) -## for kind, key, val in withNoVal.getopt(): -## printToken(kind, key, val) +## Modes are ordered from most relaxed to strictest. The names were +## chosen to set general user expectations and full compliance is neither +## achieved nor planned. ## -## # Output: -## # Option and value: j, 4 -## # Option and value: first, bar -## ``` +## Mode Differences +## ---------------- +## +## **NimMode** (default): +## +## - Short options require adjacent values or explicit delimiters: +## `-cval`, `-c:val`, `-c=val` +## - Short options follow POSIX-style bundling rules +## - Next-argument value taking (`-c val`) is **not** supported by default +## - Supports both `:` and `=` as delimiters +## - Allows whitespace around delimiters +## - Values starting with `-` are interpreted as new options +## +## **LaxMode**: +## +## - Essentially the Nim mode with some relaxations for short options: +## + Allows short options to take values from the next argument: `-c val` +## + Supports bundled short options with trailing value: `-abc val` +## - Values starting with `-` can be consumed as option arguments +## +## **GnuMode**: +## +## - Only `=` is treated as a delimiter (`:` is not a delimiter) +## - No whitespace allowed around `=` +## - Short options can take next-argument values (`-c val`), but only whitespace +## is allowed as a delimiter, separators parse as part of the value +## - Short options follow POSIX-style bundling rules +## - Values starting with `-` can be consumed as option arguments +## - Known discrepancies compared to GNU getopt: +## + No notion of optional/mandatory arguments, colon (`:`) doesn't +## indicate them and overall is not a special character. +## +## Mode-Specific Behavior +## ====================== +## +## The parser's behavior varies significantly between modes, particularly +## around how options consume their values: +## +## **Short Options** +## +## Consider `-c val`: +## +## - In `Nim` mode: `-c` is parsed as an option without a value, and `val` is +## parsed as an argument, regardless of `shortNoVal` being empty or not. +## - In `LaxMode` and `Gnu` modes: same as `Nim` when `shortNoVal` is +## empty and `c` is not in it, when it's not, `val` is consumed as the value. +## +## Consider `-c-10`: +## +## - If `shortNoVal` value is empty, all three modes parse thre separate short +## options: `c`, `1` and `0`. +## - Otherwise, if `-c` is not in `shortNoVal`: +## + `Nim`: `-c` is an option without an argument. `-10` is interpreted as a +## an option `-1` with the `0` argument. +## + `LaxMode` and `GnuMode`: `-10` is consumed as the value of `-c` +## (allowing negative number values). +## +## **Long Options** +## +## Consider `--foo:bar`: +## +## - `Nim`: `:` is a valid delimiter, so `bar` is the value of `--foo`. +## - `LaxMode`: same as `Nim`. +## - `Gnu`: only `=` is a delimiter, so this parses as an option named +## `foo:bar` without a value (unless `longNoVal` is non-empty and allows +## next-argument consumption). +## +## Consider `--foo =bar`: +## +## - `Nim`: whitespace around delimiters is allowed, so `=bar` is the +## value of `--foo`. +## - `LaxMode`: same as `Nim`. +## - `Gnu`: whitespace around `=` is not allowed, so `--foo` is an +## option without a value, and `=bar` is parsed as an argument. +## +## Custom Rule Sets +## ================ +## +## .. Warning:: Custom rule sets are unsupported and not tested +## +## If you require parsing rules beyond the three provided modes, it's possible +## to define a custom parser behavior by specifying a set of individual parser +## rules. +## +## Due to this feature being unsupported, it requires importing the private +## symbols of the module (with `import std/parseopt {.all.}`) and utilizing +## the unexported `initOptParser` overload, which accepts `set[ParserRules]` +## (see the `ParserRules` enum in the code for details). ## ## See also ## ======== @@ -171,13 +320,42 @@ ## parser ## * `parsexml module`_ for a XML / HTML parser ## * `other parsers`_ for more parsers +## * POSIX_ - The Open Group Base Specifications Issue 8. Utility Conventions +## * GNU_ - GNU C Library reference manual. 26.1.1 Program Argument Syntax Conventions +## +## .. _GNU: https://sourceware.org/glibc/manual/latest/html_node/Argument-Syntax.html +## .. _POSIX: https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap12.html {.push debugger: off.} include "system/inclrtl" -import std/strutils import std/os +when defined(nimscript): + from std/strutils import toLowerAscii, endsWith + +type + CliMode* = enum + ## Parser behavior profiles used to control parser behavior. + ## See `Parser Modes<#parser-modes>`_ for details + LaxMode, ## The most forgiving mode + NimMode, ## Nim parsing rules (default) + GnuMode ## GNU-style parsing + +type + ParserRules = enum + ## Feature flags used to assemble parser behavior for a given mode. + prSepAllowDelimBefore, ## Allow whitespace before an opt-val separator + prSepAllowDelimAfter, ## Allow whitespace after an opt-val separator + prShortAllowSep, ## Allow `-kval` form + prShortBundle, ## Allow bundling short options behind one '-' + prShortValAllowAdjacent, ## Allow adjacent short option values: `-kval` + prShortValAllowNextArg, ## Allow next-argv short option values: `-k val` + prShortValAllowDashLeading, ## Allow values that start with '-' to be taken + prLongAllowSep, ## Allow `--optval` form + prLongValAllowNextArg, ## Allow `--opt val` form, requires non-empty `longNoVal` + prSepAllowColon, ## Allow `:` as an opt-val separator + prSepAllowEq, ## Allow `=` as an opt-val separator type CmdLineKind* = enum ## The detected command line token. @@ -189,21 +367,49 @@ type ## Implementation of the command line parser. ## ## To initialize it, use the - ## `initOptParser proc<#initOptParser,string,set[char],seq[string]>`_. + ## `initOptParser proc<#initOptParser,string,set[char],seq[string],CliMode>`_. pos: int inShortState: bool - allowWhitespaceAfterColon: bool shortNoVal: set[char] longNoVal: seq[string] cmds: seq[string] idx: int + separators: set[char] ## Allowed separators for long/short option values + rules: set[ParserRules] kind*: CmdLineKind ## The detected command line token key*, val*: string ## Key and value pair; the key is the option ## or the argument, and the value is not "" if ## the option was given a value +const DelimSet = {'\t', ' '} ## Allowed delimiters between tokens + +func toRules(m: CliMode): set[ParserRules] = + ## Default rule sets for the given mode `m` + let + Common = { + prSepAllowEq, + prShortValAllowAdjacent, + prShortBundle, + prLongValAllowNextArg, + prLongAllowSep, + } + Lax = { + prSepAllowColon, + prSepAllowDelimBefore, + prSepAllowDelimAfter, + prShortAllowSep, + } + ShortPosix = { + prShortValAllowNextArg, + prShortValAllowDashLeading, + } + case m + of LaxMode: Common + Lax + ShortPosix + of NimMode: Common + Lax + of GnuMode: Common + ShortPosix + proc parseWord(s: string, i: int, w: var string, - delim: set[char] = {'\t', ' '}): int = + delim: set[char] = DelimSet): int = result = i if result < s.len and s[result] == '\"': inc(result) @@ -218,34 +424,23 @@ proc parseWord(s: string, i: int, w: var string, add(w, s[result]) inc(result) -proc initOptParser*(cmdline: seq[string], shortNoVal: set[char] = {}, - longNoVal: seq[string] = @[]; - allowWhitespaceAfterColon = true): OptParser = - ## Initializes the command line parser. - ## - ## If `cmdline.len == 0`, the real command line as provided by the - ## `os` module is retrieved instead if it is available. If the - ## command line is not available, a `ValueError` will be raised. - ## Behavior of the other parameters remains the same as in - ## `initOptParser(string, ...) - ## <#initOptParser,string,set[char],seq[string]>`_. - ## - ## See also: - ## * `getopt iterator<#getopt.i,seq[string],set[char],seq[string]>`_ - runnableExamples: - var p = initOptParser() - p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"]) - p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"], - shortNoVal = {'l'}, longNoVal = @["left"]) - result = OptParser(pos: 0, idx: 0, inShortState: false, - shortNoVal: shortNoVal, longNoVal: longNoVal, - allowWhitespaceAfterColon: allowWhitespaceAfterColon +proc initOptParser(cmdline: openArray[string]; + shortNoVal: set[char]; + longNoVal: seq[string]; + rules: set[ParserRules]): OptParser = + result = OptParser(pos: 0, idx: 0, + cmds: @cmdline, + inShortState: false, + shortNoVal: shortNoVal, + longNoVal: longNoVal, + separators: {}, + rules: rules, + kind: cmdEnd, + key: "", val: "", ) - if cmdline.len != 0: - result.cmds = newSeq[string](cmdline.len) - for i in 0..`_ for more information on - ## how this affects parsing. + ## - `cmdline`: Sequence of command line arguments to parse. If empty, the + ## real command line as provided by the `os` module is retrieved instead. + ## If the command line is not available, an assertion will be raised. + ## - `shortNoVal`: Set of short option characters that do not accept values. + ## See `shortNoVal and longNoVal<#nimshortnoval-and-nimlongnoval>`_ for details. + ## - `longNoVal`: Sequence of long option names that do not accept values. + ## See `shortNoVal and longNoVal<#nimshortnoval-and-nimlongnoval>`_ for details. + ## - `mode`: Parser behavior profile (`NimMode`, `LaxMode`, or `GnuMode`). + ## See `parser modes<#parser-modes>`_ for details. ## - ## This does not provide a way of passing default values to arguments. + ## See also: + ## * `getopt iterator<#getopt.i,seq[string],set[char],seq[string],CliMode>`_ + runnableExamples: + var p = initOptParser() + p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"]) + p = initOptParser(@["--left", "--debug:3", "-l", "-r:2"], + shortNoVal = {'l'}, longNoVal = @["left"]) + initOptParser(cmdline, shortNoVal, longNoVal, toRules(mode)) + +proc initOptParser*(cmdline: seq[string], + shortNoVal: set[char] = {}, + longNoVal: seq[string] = @[]; + allowWhitespaceAfterColon: bool): OptParser {.deprecated: + "`allowWhitespaceAfterColon` is deprecated, use parser modes instead".} = + ## This is an overload for continued support of the legacy `allowWhitespaceAfterColon` + ## option. It modifies the default parser mode so that the passed value is respected. + ## + ## Current default parser mode behaves as if `true` was passed (old default) + ## + ## - `allowWhitespaceAfterColon`: When `true`, allows forms like + ## `--option: value` or `--option= value` where the value is in the next + ## token after the delimiter. When `false`, the value must be in the same + ## token as the delimiter. + var nimrules = toRules(NimMode) + if allowWhitespaceAfterColon == false: nimrules.excl prSepAllowDelimAfter + initOptParser(cmdline, shortNoVal, longNoVal, nimrules) + +proc initOptParser*(cmdline = ""; + shortNoVal: set[char] = {}; + longNoVal: seq[string] = @[]; + mode: CliMode = NimMode): OptParser = + ## Initializes the command line parser from a command line string. + ## + ## The `cmdline` string is parsed into tokens using shell-like quoting rules. + ## + ## **Parameters:** + ## + ## - `cmdline`: Command line string to parse. If empty, the real command line + ## as provided by the `os` module is retrieved instead. If the command line + ## is not available, an assertion will be raised. + ## - `shortNoVal`: Set of short option characters that do not accept values. + ## See `shortNoVal and longNoVal<#nimshortnoval-and-nimlongnoval>`_ for details. + ## - `longNoVal`: Sequence of long option names that do not accept values. + ## See `shortNoVal and longNoVal<#nimshortnoval-and-nimlongnoval>`_ for details. + ## - `mode`: Parser behavior profile (`NimMode`, `LaxMode`, or `GnuMode`). + ## See `parser modes<#parser-modes>`_ for details. + ## + ## **Note:** This does not provide a way of passing default values to arguments. ## ## See also: ## * `getopt iterator<#getopt.i,OptParser>`_ @@ -293,34 +536,81 @@ proc initOptParser*(cmdline = "", shortNoVal: set[char] = {}, p = initOptParser("--left --debug:3 -l -r:2") p = initOptParser("--left --debug:3 -l -r:2", shortNoVal = {'l'}, longNoVal = @["left"]) + initOptParser(parseCmdLine(cmdline), shortNoVal, longNoVal, toRules(mode)) - initOptParser(parseCmdLine(cmdline), shortNoVal, longNoVal, allowWhitespaceAfterColon) +proc initOptParser*(cmdline = ""; + shortNoVal: set[char] = {}; + longNoVal: seq[string] = @[]; + allowWhitespaceAfterColon: bool): OptParser {.deprecated: + "`allowWhitespaceAfterColon` is deprecated, use parser modes instead".} = + ## This is an overload for continued support of the legacy `allowWhitespaceAfterColon` + ## option. It modifies the default parser mode so that the passed value is respected. + ## + ## Current default parser mode behaves as if `true` was passed (old default). + ## + ## - `allowWhitespaceAfterColon`: When `true`, allows forms like + ## `--option: value` or `--option= value` where the value is in the next + ## token after the delimiter. When `false`, the value must be in the same + ## token as the delimiter. + var nimrules = toRules(NimMode) + if allowWhitespaceAfterColon == false: nimrules.excl prSepAllowDelimAfter + initOptParser(parseCmdLine(cmdline), shortNoVal, longNoVal, nimrules) proc handleShortOption(p: var OptParser; cmd: string) = var i = p.pos p.kind = cmdShortOption - if i < cmd.len: + if i < cmd.len: # multidigit short option support goes here add(p.key, cmd[i]) inc(i) p.inShortState = true - while i < cmd.len and cmd[i] in {'\t', ' '}: - inc(i) - p.inShortState = false - if i < cmd.len and (cmd[i] in {':', '='} or - card(p.shortNoVal) > 0 and p.key[0] notin p.shortNoVal): - if i < cmd.len and cmd[i] in {':', '='}: + if prSepAllowDelimBefore in p.rules: + while i < cmd.len and cmd[i] in DelimSet: inc(i) + p.inShortState = false + + proc consumeDelims() = + while i < cmd.len and cmd[i] in DelimSet: inc(i) + + proc advance(p: var OptParser; n = 1)= p.inShortState = false - while i < cmd.len and cmd[i] in {'\t', ' '}: inc(i) + p.pos = 0 + inc p.idx, n + + template next(): untyped = p.cmds[p.idx + 1] + + let canTakeVal = card(p.shortNoVal) > 0 and p.key[0] notin p.shortNoVal + if i < cmd.len and cmd[i] in p.separators: + # separator case + if prShortAllowSep in p.rules: + # allow separators: skip the separator and take the value after it + inc(i) + if prSepAllowDelimAfter in p.rules: + consumeDelims() + # prohibit separators: treat separator + remainder as the value + # this represents an error state but produces output that can be validated p.val = substr(cmd, i) - p.pos = 0 - inc p.idx - else: - p.pos = i + p.advance(1) + return + elif canTakeVal and prShortValAllowAdjacent in p.rules and i < cmd.len: + # adjacent value + if prSepAllowDelimBefore in p.rules: + consumeDelims() + p.val = substr(cmd, i) + p.advance(1) + return + elif canTakeVal and + prShortValAllowNextArg in p.rules and + i >= cmd.len and + p.idx + 1 < p.cmds.len and ( + prShortValAllowDashLeading in p.rules or + not (next().len > 0 and next()[0] == '-')): + # next-argument value + p.val = next() + p.advance(2) + return + p.pos = i if i >= cmd.len: - p.inShortState = false - p.pos = 0 - inc p.idx + p.advance(1) proc next*(p: var OptParser) {.rtl, extern: "npo$1".} = ## Parses the next token. @@ -343,54 +633,71 @@ proc next*(p: var OptParser) {.rtl, extern: "npo$1".} = return var i = p.pos - while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i) + template cmd(): untyped = p.cmds[p.idx] + template nextArg(): untyped = p.cmds[p.idx + 1] + + proc consumeDelims(cmds: openArray[string]; idx: int) = + while i < cmds[idx].len and cmds[idx][i] in DelimSet: inc(i) + + proc advance(p: var OptParser; n = 1) = + p.pos = 0 + inc p.idx, n + + consumeDelims(p.cmds, p.idx) p.pos = i setLen(p.key, 0) setLen(p.val, 0) if p.inShortState: p.inShortState = false - if i >= p.cmds[p.idx].len: - inc(p.idx) - p.pos = 0 + if i < cmd.len: + handleShortOption(p, p.cmds[p.idx]) + return + else: + p.advance(1) if p.idx >= p.cmds.len: p.kind = cmdEnd return - else: - handleShortOption(p, p.cmds[p.idx]) - return - if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-': + if i < cmd.len and cmd[i] == '-': inc(i) - if i < p.cmds[p.idx].len and p.cmds[p.idx][i] == '-': + if i < cmd.len and cmd[i] == '-': p.kind = cmdLongOption inc(i) - i = parseWord(p.cmds[p.idx], i, p.key, {' ', '\t', ':', '='}) - while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i) - if i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {':', '='}: + i = parseWord(cmd, i, p.key, + DelimSet + (if prLongAllowSep in p.rules: p.separators else: {})) + if prSepAllowDelimBefore in p.rules: + consumeDelims(p.cmds, p.idx) + if prLongAllowSep in p.rules and i < cmd.len and cmd[i] in p.separators: inc(i) - while i < p.cmds[p.idx].len and p.cmds[p.idx][i] in {'\t', ' '}: inc(i) - # if we're at the end, use the next command line option: - if i >= p.cmds[p.idx].len and p.idx < p.cmds.len and - p.allowWhitespaceAfterColon: - inc p.idx - i = 0 - if p.idx < p.cmds.len: - p.val = p.cmds[p.idx].substr(i) - elif len(p.longNoVal) > 0 and p.key notin p.longNoVal and p.idx+1 < p.cmds.len: - p.val = p.cmds[p.idx+1] - inc p.idx + if prSepAllowDelimAfter in p.rules: + consumeDelims(p.cmds, p.idx) + if i >= cmd.len and p.idx + 1 < p.cmds.len and + prSepAllowDelimAfter in p.rules: + p.val = nextArg() + p.advance(2) + else: + p.val = cmd.substr(i) + p.advance(1) + elif prLongValAllowNextArg in p.rules and + len(p.longNoVal) > 0 and + p.key notin p.longNoVal and + p.idx + 1 < p.cmds.len: + p.val = nextArg() + p.advance(2) else: - p.val = "" - inc p.idx - p.pos = 0 + if i < cmd.len: + # Leave remainder of the current token to be parsed as an argument. + consumeDelims(p.cmds, p.idx) + p.cmds[p.idx] = cmd.substr(i) + else: + p.advance(1) else: p.pos = i - handleShortOption(p, p.cmds[p.idx]) + handleShortOption(p, cmd) else: p.kind = cmdArgument - p.key = p.cmds[p.idx] - inc p.idx - p.pos = 0 + p.key = cmd + p.advance(1) when declared(quoteShellCommand): proc cmdLineRest*(p: OptParser): string {.rtl, extern: "npo$1".} = @@ -469,8 +776,10 @@ iterator getopt*(p: var OptParser): tuple[kind: CmdLineKind, key, if p.kind == cmdEnd: break yield (p.kind, p.key, p.val) -iterator getopt*(cmdline: seq[string] = @[], - shortNoVal: set[char] = {}, longNoVal: seq[string] = @[]): +iterator getopt*(cmdline: seq[string] = @[]; + shortNoVal: set[char] = {}; + longNoVal: seq[string] = @[]; + mode: CliMode = NimMode): tuple[kind: CmdLineKind, key, val: string] = ## Convenience iterator for iterating over command line arguments. ## @@ -483,6 +792,9 @@ iterator getopt*(cmdline: seq[string] = @[], ## parameters<#nimshortnoval-and-nimlongnoval>`_ for more information on ## how this affects parsing. ## + ## `mode` selects the parser behavior profile (`NimMode`, `LaxMode`, + ## or `GnuMode`). See `parser modes<#parser-modes>`_ for details. + ## ## There is no need to check for `cmdEnd` while iterating. If using `getopt` ## with case switching, checking for `cmdEnd` is required. ## @@ -513,7 +825,8 @@ iterator getopt*(cmdline: seq[string] = @[], ## writeHelp() ## ``` var p = initOptParser(cmdline, shortNoVal = shortNoVal, - longNoVal = longNoVal) + longNoVal = longNoVal, + rules = toRules(mode)) while true: next(p) if p.kind == cmdEnd: break diff --git a/tests/misc/tparseoptmodes.nim b/tests/misc/tparseoptmodes.nim new file mode 100644 index 0000000000..1412c0caf2 --- /dev/null +++ b/tests/misc/tparseoptmodes.nim @@ -0,0 +1,508 @@ +discard """ + action: run +""" + +import parseopt +from std/sequtils import toSeq + +type Opt = tuple[kind: CmdLineKind, key, val: string] +proc `$`(opt: Opt): string = "(" & $opt[0] & ", \"" & opt[1] & "\", \"" & opt[2] & "\")" + +proc collect(args: seq[string] | string; + shortNoVal: set[char] = {}; + longNoVal: seq[string] = @[]): seq[(CliMode, seq[Opt])] = + for mode in CliMode: + var p = parseopt.initOptParser(args, + shortNoVal = shortNoVal, longNoVal = longNoVal, mode = mode) + let res = toSeq(parseopt.getopt(p)) + result.add (mode, res) + +proc check(name: string; + results: openArray[(CliMode, seq[Opt])]; + expected: proc(m: CliMode): seq[Opt]) = + for (mode, res) in results: + doAssert res == expected(mode), "[" & $mode & "]: " & name & ":\n" & $res + +block: + # pcShortValAllowNextArg: separate option-argument for mandatory opt-arg. + let res = collect(@["-c", "4"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "c", "4")] + of NimMode: @[(cmdShortOption, "c", ""), (cmdArgument, "4", "")] + of GnuMode: @[(cmdShortOption, "c", "4")] + check("short whitespace value", res, expected) + +block: + # No opt-arg knowledge: whitespace does not bind to short option. + let res = collect(@["-c", "4"]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "c", ""), (cmdArgument, "4", "")] + check("short no-val whitespace value", res, expected) + +block: + # pcShortBundle + pcShortValAllowNextArg: grouped shorts with one opt-arg. + let res = collect(@["-abc", "4"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "a", ""), + (cmdShortOption, "b", ""), + (cmdShortOption, "c", "4")] + + of NimMode: @[(cmdShortOption, "a", ""), + (cmdShortOption, "b", ""), + (cmdShortOption, "c", ""), + (cmdArgument, "4", "")] + + of GnuMode: @[(cmdShortOption, "a", ""), + (cmdShortOption, "b", ""), + (cmdShortOption, "c", "4")] + check("short bundle with trailing value", res, expected) + +block: + # pcShortValAllowAdjacent: option+argument in same token (dash-led value). + let res = collect(@["-c-x"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "c", "-x")] + check("short adjacent dash-led", res, expected) + +block: + # pcShortBundle + pcShortValAllowAdjacent (dash-led value). + let res = collect(@["-abc-10"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "a", ""), + (cmdShortOption, "b", ""), + (cmdShortOption, "c", "-10")] + check("short bundle with adjacent negative", res, expected) + +block: + # pcShortValAllowNextArg: option and option-argument can be separate args. + let res = collect(@["-c", ":"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "c", ":")] + of NimMode: @[(cmdShortOption, "c", ""), (cmdArgument, ":", "")] + of GnuMode: @[(cmdShortOption, "c", ":")] + check("short whitespace colon value", res, expected) + +block: + # pcShortValAllowAdjacent: combined option+argument without blanks. + let res = collect(@["-abc4"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "a", ""), + (cmdShortOption, "b", ""), + (cmdShortOption, "c", "4")] + check("short bundle adjacent value", res, expected) + +block: + # pcShortBundle: bundle of no-arg shorts should split into options. + let res = collect(@["-ab"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "a", ""), (cmdShortOption, "b", "")] + check("short bundle no-arg", res, expected) + +block: + # pcShortBundle + pcShortValAllowNextArg: a no-arg short followed by one with arg. + let res = collect(@["-ac", "4"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode : @[(cmdShortOption, "a", ""), + (cmdShortOption, "c", "4")] + of NimMode: @[(cmdShortOption, "a", ""), + (cmdShortOption, "c", ""), + (cmdArgument, "4", "")] + of GnuMode: @[(cmdShortOption, "a", ""), + (cmdShortOption, "c", "4")] + check("short bundle trailing value", res, expected) + +block: + # pcShortValAllowNextArg + cmdline parsing: whitespace-separated opt-arg. + let res = collect("-c \"foo bar\"", shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "c", "foo bar")] + of NimMode: @[(cmdShortOption, "c", ""), (cmdArgument, "foo bar", "")] + of GnuMode: @[(cmdShortOption, "c", "foo bar")] + check("short whitespace quoted value", res, expected) + +block: + # pcShortValAllowNextArg + pcShortValAllowDashLeading: negative numbers as opt-args. + let res = collect(@["-n", "-10"], shortNoVal = {'a', 'b', 'c'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "n", "-10")] + of NimMode: @[(cmdShortOption, "n", ""), (cmdShortOption, "1", "0")] + of GnuMode: @[(cmdShortOption, "n", "-10")] + check("short negative value, shortNoVal used", res, expected) + +block: + # pcShortValAllowNextArg + pcShortValAllowDashLeading: negative numbers as opt-args. + let res = collect(@["-n", "-10"]) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "n", ""), + (cmdShortOption, "1", ""), + (cmdShortOption, "0", "")] + of NimMode: @[(cmdShortOption, "n", ""), + (cmdShortOption, "1", ""), + (cmdShortOption, "0", "")] + of GnuMode: @[(cmdShortOption, "n", ""), + (cmdShortOption, "1", ""), + (cmdShortOption, "0", "")] + check("short negative value, shortNoVal empty", res, expected) + +block: + # pcShortValAllowNextArg: repeated option-argument pairs are interpreted in order. + let res = collect(@["-c", "1", "-c", "2"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "c", "1"), + (cmdShortOption, "c", "2")] + of NimMode: @[(cmdShortOption, "c", ""), + (cmdArgument, "1", ""), + (cmdShortOption, "c", ""), + (cmdArgument, "2", "")] + of GnuMode: @[(cmdShortOption, "c", "1"), + (cmdShortOption, "c", "2")] + check("short repeat whitespace values", res, expected) + +block: + # pcShortValAllowAdjacent: adjacent opt-args preserve order for repeats. + let res = collect(@["-c1", "-c2"], shortNoVal = {'a', 'b'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "c", "1"), (cmdShortOption, "c", "2")] + check("short repeat adjacent values", res, expected) + +block: + # pcShortValAllowDashLeading: value starting with '-' is consumed as opt-arg. + # Divergence from POSIX Guideline 14 when enabled. + let res = collect(@["-c", "-a"], shortNoVal = {'b'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "c", "-a")] + of NimMode: @[(cmdShortOption, "c", ""), (cmdShortOption, "a", "")] + of GnuMode: @[(cmdShortOption, "c", "-a")] + check("short dash-led value", res, expected) + +block: + # Separator overrides shortNoVal + let res = collect(@["-a=foo"], shortNoVal = {'a'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "a", "foo")] + of NimMode: @[(cmdShortOption, "a", "foo")] + of GnuMode: @[(cmdShortOption, "a", "=foo")] + check("separator suppresses shortNoVal", res, expected) + +block: + let res = collect(@["-a=foo"], shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "a", "foo")] + of NimMode: @[(cmdShortOption, "a", "foo")] + of GnuMode: @[(cmdShortOption, "a", "=foo")] + check("adjacent value-taking vs chort option bundling 1", res, expected) + +block: + # pcLongAllowSep, mixed long/short parsing. + # Option-arguments may include ':'/'=' chars. + let args = @[ + "foo bar", + "--path:/i like space/projects", + "--aa:bar=a", + "--a=c:d", + "--ab", + "-c", + "--a[baz]:doo" + ] + let res = collect(args, shortNoVal = {'c'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[ + (cmdArgument, "foo bar", ""), + (cmdLongOption, "path", "/i like space/projects"), + (cmdLongOption, "aa", "bar=a"), + (cmdLongOption, "a", "c:d"), + (cmdLongOption, "ab", ""), + (cmdShortOption, "c", ""), + (cmdLongOption, "a[baz]", "doo")] + of NimMode: @[ + (cmdArgument, "foo bar", ""), + (cmdLongOption, "path", "/i like space/projects"), + (cmdLongOption, "aa", "bar=a"), + (cmdLongOption, "a", "c:d"), + (cmdLongOption, "ab", ""), + (cmdShortOption, "c", ""), + (cmdLongOption, "a[baz]", "doo")] + of GnuMode: @[ + (cmdArgument, "foo bar", ""), + (cmdLongOption, "path:/i", ""), # longNoVal is empty so can't take arg here + (cmdArgument, "like space/projects", ""), + (cmdLongOption, "aa:bar", "a"), + (cmdLongOption, "a", "c:d"), + (cmdLongOption, "ab", ""), + (cmdShortOption, "c", ""), + (cmdLongOption, "a[baz]:doo", "")] + check("mixed long/short argv tokens", res, expected) + + +block: + # pcLongAllowSep + separators: long option separator handling. + let res = collect(@["--foo:bar"]) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdLongOption, "foo", "bar")] + of NimMode: @[(cmdLongOption, "foo", "bar")] + of GnuMode: @[(cmdLongOption, "foo:bar", "")] + check("long option colon separator", res, expected) + +block: + # pcLongAllowSep + separators: long option separator handling. + let res = collect(@["--foo= bar"]) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdLongOption, "foo", "bar")] + of NimMode: @[(cmdLongOption, "foo", "bar")] + of GnuMode: @[(cmdLongOption, "foo", " bar")] + check("long option whitespace around separators", res, expected) + +block: + let res = collect(@["--foo =bar"]) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdLongOption, "foo", "bar")] + of NimMode: @[(cmdLongOption, "foo", "bar")] + of GnuMode: @[(cmdLongOption, "foo", ""), (cmdArgument, "=bar", "")] + check("long option whitespace around separators", res, expected) + +block: + let res = collect("--foo =bar", longNoVal = @[""]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "foo", "=bar")] + check("long option argument delimited with whitespace, val allowed", res, expected) + +block: + let res = collect("--foo =bar") + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "foo", ""), (cmdArgument, "=bar", "")] + check("long option argument delimited with whitespace, val not allowed", res, expected) + +block: + # pcLongAllowSep: '=' separator + let res = collect(@["--foo=bar"]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "foo", "bar")] + check("long option equals separator", res, expected) + +block: + # pcLongValAllowNextArg: long option value can be next argument. + let res = collect(@["--foo", "bar"], longNoVal = @[""]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "foo", "bar")] + check("long option next-arg value", res, expected) + +block: + # longNoVal disables next-arg value consumption. + let res = collect(@["--foo", "bar"], longNoVal = @["foo"]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "foo", ""), (cmdArgument, "bar", "")] + check("long option longNoVal disables argument taking", res, expected) + +block: + # "--" is parsed as a long option with an empty key. + let res = collect(@["--", "rest"]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "", ""), (cmdArgument, "rest", "")] + check("double-dash marker", res, expected) + +block: + # option values beginning with ':' - doubled up + let res = collect(@["--foo::"]) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdLongOption, "foo", ":")] + of NimMode: @[(cmdLongOption, "foo", ":")] + of GnuMode: @[(cmdLongOption, "foo::", "")] + check("long option value starting with colon (doubled)", res, expected) + +block: + # option values beginning with '=' - doubled up + let res = collect(@["--foo=="]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "foo", "=")] + check("long option value starting with equals (doubled)", res, expected) + +block: + # option values beginning with ':' - alternated with '=' + let res = collect(@["--foo=:"]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "foo", ":")] + check("long option value starting with colon (alternated)", res, expected) + +block: + # option values beginning with '=' - alternated with ':' + let res = collect(@["--foo:="]) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdLongOption, "foo", "=")] + of NimMode: @[(cmdLongOption, "foo", "=")] + of GnuMode: @[(cmdLongOption, "foo:", "")] + check("long option value starting with equals (alternated)", res, expected) + +block issue9619: + let res = collect(@["--option=", "", "--anotherOption", "tree"]) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdLongOption, "option", ""), + (cmdLongOption, "anotherOption", ""), + (cmdArgument, "tree", "")] + of NimMode: @[(cmdLongOption, "option", ""), + (cmdLongOption, "anotherOption", ""), + (cmdArgument, "tree", "")] + of GnuMode: @[(cmdLongOption, "option", ""), + (cmdArgument, "", ""), + (cmdLongOption, "anotherOption", ""), + (cmdArgument, "tree", "")] + check("issue #9619, whitespace after separator", res, expected) + + +block issue22736: + let res = collect(@["--long", "", "-h", "--long:", "-h", "--long=", "-h", "arg"]) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdLongOption, "long", ""), + (cmdArgument, "", ""), + (cmdShortOption, "h", ""), + (cmdLongOption, "long", "-h"), + (cmdLongOption, "long", "-h"), + (cmdArgument, "arg", "")] + of NimMode: @[(cmdLongOption, "long", ""), + (cmdArgument, "", ""), + (cmdShortOption, "h", ""), + (cmdLongOption, "long", "-h"), + (cmdLongOption, "long", "-h"), + (cmdArgument, "arg", "")] + of GnuMode: @[(cmdLongOption, "long", ""), + (cmdArgument, "", ""), + (cmdShortOption, "h", ""), + (cmdLongOption, "long:", ""), + (cmdShortOption, "h", ""), + (cmdLongOption, "long", ""), + (cmdShortOption, "h", ""), + (cmdArgument, "arg", "")] + check("issue #22736, whitespace after separator, colon separator", res, expected) + +# Numbers ===================================================================== + +block: + # Positive integer adjacent to option + let res = collect("-n42", shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "n", "42")] + check("numerical option: positive integer adjacent", res, expected) + +block: + # Positive integer adjacent to no-val option + let res = collect("-n42x", shortNoVal = {'n'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "n", ""), (cmdShortOption, "4", "2x")] + of NimMode: @[(cmdShortOption, "n", ""), (cmdShortOption, "4", "2x")] + of GnuMode: @[(cmdShortOption, "n", ""), (cmdShortOption, "4", "2x")] + check("numerical no-val option: positive integer adjacent", res, expected) + +block: + # Negative integer adjacent to option + let res = collect("-n-42", shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "n", "-42")] + check("numerical option: negative integer adjacent", res, expected) + +block: + # Floating point number as value + let res = collect(@["-n", "3.14"], shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "n", "3.14")] + of NimMode: @[(cmdShortOption, "n", ""), (cmdArgument, "3.14", "")] + of GnuMode: @[(cmdShortOption, "n", "3.14")] + check("numerical option: floating point whitespace", res, expected) + +block: + # Floating point adjacent to option + let res = collect(@["-n3.14"], shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "n", "3.14")] + check("numerical option: floating point adjacent", res, expected) + +block: + # Negative floating point + let res = collect(@["-n", "-3.14"], shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "n", "-3.14")] + of NimMode: @[(cmdShortOption, "n", ""), (cmdShortOption, "3", ".14")] + of GnuMode: @[(cmdShortOption, "n", "-3.14")] + check("numerical option: negative floating point whitespace", res, expected) + +block: + # Negative floating point adjacent + let res = collect("-n-3.14", shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + @[(cmdShortOption, "n", "-3.14")] + check("numerical option: negative floating point adjacent", res, expected) + +block: + # Large number + let res = collect(@["-n", "414"], shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "n", "414")] + of NimMode: @[(cmdShortOption, "n", ""), (cmdArgument, "414", "")] + of GnuMode: @[(cmdShortOption, "n", "414")] + check("numerical option: large number", res, expected) + + +block: + # Multiple numerical options + let res = collect("-n 10 -m20 -k= 30 -40", shortNoVal = {'v'}) + proc expected(m: CliMode): seq[Opt] = + case m + of LaxMode: @[(cmdShortOption, "n", "10"), + (cmdShortOption, "m", "20"), + (cmdShortOption, "k", ""), # buggy but preserved + (cmdArgument, "30", ""), + (cmdShortOption, "4", "0")] + of NimMode: @[(cmdShortOption, "n", ""), + (cmdArgument, "10", ""), + (cmdShortOption, "m", "20"), + (cmdShortOption, "k", ""), # buggy but preserved + (cmdArgument, "30", ""), + (cmdShortOption, "4", "0")] + of GnuMode: @[(cmdShortOption, "n", "10"), + (cmdShortOption, "m", "20"), + (cmdShortOption, "k", "="), + (cmdArgument, "30", ""), + (cmdShortOption, "4", "0")] + check("numerical option: multiple options", res, expected) + +block: + # Long option with numerical value + let res = collect(@["--count=42"], longNoVal = @[]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "count", "42")] + check("numerical option: long option with equals", res, expected) + +block: + # Long option with numerical value (whitespace) + let res = collect(@["--count", "42"], longNoVal = @[""]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "count", "42")] + check("numerical option: long option with whitespace", res, expected) + +block: + # Long option with negative numerical value + let res = collect(@["--offset=-10"], longNoVal = @[]) + proc expected(m: CliMode): seq[Opt] = + @[(cmdLongOption, "offset", "-10")] + check("numerical option: long option negative", res, expected)