diff --git a/changelog.md b/changelog.md index 53e0c0d476..aa0485975e 100644 --- a/changelog.md +++ b/changelog.md @@ -66,12 +66,17 @@ errors. Modes include `Nim` (default, fully compatible) and two new experimental modes: `Lax` and `Gnu` for different option parsing behaviors. +- `std/nre2` is added to replace deprecated NRE. + [//]: # "Changes:" - `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type. - `min`, `max`, and `sequtils`' `minIndex`, `maxIndex` and `minmax` for `openArray`s now accept a comparison function. - `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation. - `system.newStringUninit` is now considered free of side-effects allowing it to be used with `--experimental:strictFuncs`. +- `std/re` and `std/nre` are deprecated as PCRE library is obsolete. + Use https://github.com/nitely/nim-regex or `std/nre2`. + See: https://github.com/nim-lang/Nim/issues/23668. ## Language changes diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim index 8c712c4a6c..adc2ceb22d 100644 --- a/lib/impure/nre.nim +++ b/lib/impure/nre.nim @@ -9,6 +9,11 @@ when defined(js): {.error: "This library needs to be compiled with a c-like backend, and depends on PCRE; See jsre for JS backend.".} +## .. warning:: NRE is deprecated. +## Use [Regex](https://github.com/nitely/nim-regex) or +## `NRE2 `_ that wraps Regex so that you can easily replace NRE. +## PCRE library is now at end of life. +## ## What is NRE? ## ============ ## @@ -84,7 +89,7 @@ type Regex* = ref RegexDesc ## Represents the pattern that things are matched against, constructed with ## `re(string)`. Examples: `re"foo"`, `re(r"(*ANYCRLF)(?x)foo # - ## comment".` + ## comment")` ## ## `pattern: string` ## : the string that was used to create the pattern. For details on how @@ -154,7 +159,7 @@ type ## will need to pass these as separate flags to PCRE. RegexMatch* = object - ## Usually seen as Option[RegexMatch], it represents the result of an + ## Usually seen as `Option[RegexMatch]`, it represents the result of an ## execution. On failure, it is none, on success, it is some. ## ## `pattern: Regex` diff --git a/lib/impure/re.nim b/lib/impure/re.nim index b39135779b..72d01b9527 100644 --- a/lib/impure/re.nim +++ b/lib/impure/re.nim @@ -10,6 +10,10 @@ when defined(js): {.error: "This library needs to be compiled with a c-like backend, and depends on PCRE; See jsre for JS backend.".} +## .. warning:: This module is deprecated. +## Use [Regex](https://github.com/nitely/nim-regex). +## PCRE library is now at end of life. +## ## Regular expression support for Nim. ## ## This module is implemented by providing a wrapper around the diff --git a/lib/std/nre2.nim b/lib/std/nre2.nim new file mode 100644 index 0000000000..60ff977c60 --- /dev/null +++ b/lib/std/nre2.nim @@ -0,0 +1,344 @@ +# +# Nim's Runtime Library +# (c) Copyright 2026 Nim Contributors +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## What is NRE2? +## ============= +## +## A regular expression library for Nim to replace deprecated NRE. +## It is implemented with `Regex`_ , +## that is pure Nim regex engine and guarantees linear time matching. +## It supports compiling regex and matching at compile-time and +## works with JS backend. +## +## NRE2 is mostly compatible with NRE and the syntax of regular expression is similar to PCRE. +## But it lacks a few features and how to set options in a pattern is different. +## +## The syntax of regular expression is explained in https://nitely.github.io/nim-regex/regex.html +runnableExamples: + import std/sugar + let vowels = re"[aeoui]" + let bounds = collect: + for match in "moiga".findIter(vowels): match.matchBounds + assert bounds == @[1 .. 1, 2 .. 2, 4 .. 4] + from std/sequtils import toSeq + let s = sequtils.toSeq("moiga".findIter(vowels)) + # fully qualified to avoid confusion with nre.toSeq + assert s.len == 3 + + let firstVowel = "foo".find(vowels) + let hasVowel = firstVowel.isSome() + assert hasVowel + let matchBounds = firstVowel.get().captureBounds[-1] + assert matchBounds.a == 1 + + # as with module `re`, unless specified otherwise, `start` parameter in each + # proc indicates where the scan starts, but outputs are relative to the start + # of the input string, not to `start`: + assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab" + assert find("uxabc", re"ab", start = 3).isNone + +import std/[options, tables] +import regex, regex/nfatype + +export options +export regex.RegexFlags, regex.RegexError + +type + Regex* = regex.Regex2 + ## Represents the pattern that things are matched against, constructed with + ## `re(string)`. Examples: `re"foo"`, `re(r"(?x)foo #comment")` + ## + ## `captureCount: int` + ## : the number of captures that the pattern has. + ## + ## `captureNameId: Table[string, int]` + ## : a table from the capture names to their numeric id. + ## + ## The syntax of regular expression of Regex is explained in https://nitely.github.io/nim-regex/regex.html + + RegexMatch* = object + ## Usually seen as `Option[RegexMatch]`, it represents the result of an + ## execution. On failure, it is none, on success, it is some. + ## + ## `str: string` + ## : the string that was matched against + ## + ## `captures[]: string` + ## : the string value of whatever was captured at that id. If the value + ## is invalid, then behavior is undefined. If the id is `-1`, then + ## the whole match is returned. If the given capture was not matched, + ## `nil` is returned. See examples for `match`. + ## + ## `captureBounds[]: HSlice[int, int]` + ## : gets the bounds of the given capture according to the same rules as + ## the above. If the capture is not filled, then `None` is returned. + ## The bounds are both inclusive. See examples for `match`. + ## + ## `match: string` + ## : the full text of the match. + ## + ## `matchBounds: HSlice[int, int]` + ## : the bounds of the match, as in `captureBounds[]` + ## + ## `(captureBounds|captures).toTable` + ## : returns a table with each named capture as a key. + ## + ## `(captureBounds|captures).toSeq` + ## : returns all the captures by their number. + ## + ## `$: string` + ## : same as `match` + str*: string ## The string that was matched against. + matchImpl: regex.RegexMatch2 + + Captures* {.borrow: `.`.} = distinct RegexMatch + CaptureBounds* {.borrow: `.`.} = distinct RegexMatch + +func captureCount*(pattern: Regex): int {.inline.} = + pattern.toRegex().groupsCount + +func captureNameId*(pattern: Regex): Table[string, int] = + result = initTable[string, int](pattern.toRegex().namedGroups.len) + for k, v in pattern.toRegex().namedGroups: + result[k] = v + +func captureBounds*(match: RegexMatch): CaptureBounds {.inline.} = + CaptureBounds(match) + +func captures*(match: RegexMatch): Captures {.inline.} = + Captures(match) + +func contains*(match: Captures or CaptureBounds, i: int): bool {.inline.} = + i >= -1 and i < match.matchImpl.groupsCount and match.matchImpl.group(i) != reNonCapture + +func len*(match: Captures or CaptureBounds): int {.inline.} = + ## Return the number of capturing groups + match.matchImpl.groupsCount + +func `[]`*(match: CaptureBounds; i: int): HSlice[int, int] {.inline.} = + if i == -1: match.matchImpl.boundaries else: match.matchImpl.group(i) + +func `[]`*(match: CaptureBounds; name: string): HSlice[int, int] {.inline.} = + result = match.matchImpl.group(name) + if result == reNonCapture: + raise newException(KeyError, "Group '" & name & "' was not captured") + +func `[]`*(match: Captures; i: int): string {.inline.} = + match.str[CaptureBounds(match)[i]] + +func `[]`*(match: Captures, name: string): string {.inline.} = + match.str[CaptureBounds(match)[name]] + +func match*(match: RegexMatch): string {.inline.} = + match.str[match.matchImpl.boundaries] + +func matchBounds*(match: RegexMatch): HSlice[int, int] {.inline.} = + match.matchImpl.boundaries + +func contains*(match: CaptureBounds or Captures, name: string): bool {.inline.} = + name in match.matchImpl.namedGroups and + match.matchImpl.group(name) != reNonCapture + +func toTable*(match: Captures): Table[string, string] = + result = initTable[string, string]() + for k, i in match.matchImpl.namedGroups: + let r = match.matchImpl.group(i) + if r != reNonCapture: + result[k] = match.str[r] + +func toTable*(match: CaptureBounds): Table[string, HSlice[int, int]] = + result = initTable[string, HSlice[int, int]]() + for k, i in match.matchImpl.namedGroups: + let r = match.matchImpl.group(i) + if r != reNonCapture: + result[k] = match.matchImpl.group(i) + +iterator items*(match: CaptureBounds; default = none(HSlice[int, int])): Option[HSlice[int, int]] = + for i in 0 ..< match.len: + yield if i in match: some(match[i]) else: default + +iterator items*(match: Captures; default = none(string)): Option[string] = + for i in 0 ..< match.len: + yield if i in match: some(match[i]) else: default + +func toSeq*(match: CaptureBounds; + default = none(HSlice[int, int])): seq[Option[HSlice[int, int]]] = + result = @[] + for it in match.items(default): result.add it + +func toSeq*(match: Captures; + default: Option[string] = none(string)): seq[Option[string]] = + result = @[] + for it in match.items(default): result.add it + +func `$`*(match: RegexMatch): string = + match.match + +func re*(pattern: static string; flags: static RegexFlags = {}): static[Regex2] = + ## Parse and compile a regular expression at compile-time + result = regex.re2(pattern, flags) + +func re*(pattern: string; flags: RegexFlags = {}): Regex = + ## Parse and compile a regular expression at run-time + result = regex.re2(pattern, flags) + +func match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] = + ## Like `find(...)<#find,string,Regex,int>`_, but anchored to the start of the + ## string. + runnableExamples: + assert "foo".match(re"f").isSome + assert "foo".match(re"o").isNone + + assert "abc".match(re"(\w)").get.captures[0] == "a" + assert "abc".match(re"(?P\w)").get.captures["letter"] == "a" + assert "abc".match(re"(\w)\w").get.captures[-1] == "ab" + + assert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0 + assert 0 in "abc".match(re"(\w)").get.captureBounds + assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1 + assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2 + var mat = default(RegexMatch) + let r = regex.startsWith(str.toOpenArray(0, min(str.high, endpos)), pattern, mat.matchImpl, start) + if r: + mat.str = str + some(mat) + else: + none(RegexMatch) + +iterator findIter*(str: string; pattern: Regex; start = 0, endpos = int.high): RegexMatch = + ## Works the same as `find(...)<#find,string,Regex,int>`_, but finds every + ## non-overlapping match: + runnableExamples: + import std/sugar + assert collect(for a in "2222".findIter(re"22"): a.match) == @["22", "22"] + # not @["22", "22", "22"] + ## Arguments are the same as `find(...)<#find,string,Regex,int>`_ + ## + ## Variants: + ## + ## - `proc findAll(...)` returns a `seq[string]` + var mat = RegexMatch(str: str) + # TODO: + # needs following PR to remove `substr` call. + # https://github.com/nitely/nim-regex/pull/162 + for m in regex.findAll(str.substr(start, endpos), pattern): + mat.matchImpl = m + yield mat + +proc find*(str: string; pattern: Regex; start = 0; endpos = int.high): Option[RegexMatch] = + ## Finds the given pattern in the string between the end and start + ## positions. + ## + ## `start` + ## : The start point at which to start matching. `|abc` is `0`; + ## `a|bc` is `1` + ## + ## `endpos` + ## : The maximum index for a match; `int.high` means the end of the + ## string, otherwise it’s an inclusive upper bound. + var mat = default(RegexMatch) + let r = regex.find(str.substr(start, endpos), pattern, mat.matchImpl) + + # remove following code after regex.find get `start`/`last` parameter + for v in mat.matchImpl.captures.mitems: + v.a += start + v.b += start + mat.matchImpl.boundaries.a += start + mat.matchImpl.boundaries.b += start + + if r: + mat.str = str + some(mat) + else: + none(RegexMatch) + +proc findAll*(str: string; pattern: Regex; start = 0; endpos = int.high): seq[string] = + result = @[] + for match in str.findIter(pattern, start, endpos): + result.add(match.match) + +proc contains*(str: string; pattern: Regex; start = 0; endpos = int.high): bool = + ## Determine if the string contains the given pattern between the end and + ## start positions: + ## This function is equivalent to `isSome(str.find(pattern, start, endpos))`. + runnableExamples: + assert "abc".contains(re"bc") + assert not "abc".contains(re"cd") + assert not "abc".contains(re"a", start = 1) + + isSome(str.find(pattern, start, endpos)) + +proc split*(str: string; pattern: Regex; maxSplit = -1; start = 0): seq[string] = + ## Splits the string with the given regex. This works according to the + ## rules that Perl and Javascript use. + ## + ## `start` behaves the same as in `find(...)<#find,string,Regex,int>`_. + ## + runnableExamples: + # - If the match is zero-width, then the string is still split: + assert "123".split(re"") == @["1", "2", "3"] + + # - If the pattern has a capture in it, it is added after the string + # split: + assert "12".split(re"(\d)") == @["", "1", "", "2", ""] + + # - If `maxsplit != -1`, then the string will only be split + # `maxsplit - 1` times. This means that there will be `maxsplit` + # strings in the output seq. + assert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"] + + result = splitIncl(str, pattern, maxSplit, start) + +proc replace*(str: string; pattern: Regex; + subproc: proc (match: RegexMatch): string): string = + ## Replaces each match of Regex in the string with `subproc`, which should + ## never be or return `nil`. + ## + ## If `subproc` is a `proc (RegexMatch): string`, then it is executed with + ## each match and the return value is the replacement value. + ## + ## If `subproc` is a `proc (string): string`, then it is executed with the + ## full text of the match and the return value is the replacement value. + ## + ## If `subproc` is a string, the syntax is as follows: + ## + ## - `$$` - literal `$` + ## - `$123` - capture number `123` + ## - `$1$#` - first and second captures + ## - `$#` - first capture + ## + ## Following syntax is not supported in NRE2 + ## + ## - `$foo` - named capture `foo` + ## - `${foo}` - same as above + ## - `$0` - full match + ## + ## If a given capture is missing, `ValueError` is thrown. + proc by(m: RegexMatch2, s: string): string = + let mat = RegexMatch(str: s, matchImpl: m) + result = subproc(mat) + + result = regex.replace(str, pattern, by) + +proc replace*(str: string; pattern: Regex; + subproc: proc (match: string): string): string = + proc by(m: RegexMatch2; s: string): string = + result = subproc(s) + + result = regex.replace(str, pattern, by) + +proc replace*(str: string; pattern: Regex; sub: string): string = + result = regex.replace(str, pattern, sub) + +func escapeRe*(str: string): string = + ## Escapes the string so it doesn't match any special characters. + runnableExamples: + assert escapeRe("fly+wind") == "fly\\+wind" + assert escapeRe("nim*") == "nim\\*" + + result = regex.escapeRe(str) diff --git a/lib/std/nre2.nims b/lib/std/nre2.nims new file mode 100644 index 0000000000..1286aaa33a --- /dev/null +++ b/lib/std/nre2.nims @@ -0,0 +1,14 @@ +import std/os + +if getCommand() == "doc": + # std/nre2 requires nim-regex and it requires nim-unicodedb. + # when build documentation on CI, git clone them as nimble is not available + + const PkgDir = "build/deps" + const Pkgs = ["nim-regex", "nim-unicodedb"] + + for n in Pkgs: + if not dirExists(PkgDir / n): + exec("git clone -q https://github.com/nitely/" & n & " " & (PkgDir / n)) + + switch("path", "$nim" / PkgDir / n / "src") diff --git a/tests/stdlib/tnre2.nim b/tests/stdlib/tnre2.nim new file mode 100644 index 0000000000..6cea0f8114 --- /dev/null +++ b/tests/stdlib/tnre2.nim @@ -0,0 +1,196 @@ +import std/[assertions, options, sequtils, strutils, tables] +import std/nre2 + +block: + let pattern = "[0-9" + doAssertRaises(RegexError): discard re(pattern) + +block: # captures + block: # capture bounds are correct + let ex1 = re("([0-9])") + doAssert "1 23".find(ex1).get.matchBounds == 0 .. 0 + doAssert "1 23".find(ex1).get.captureBounds[0] == 0 .. 0 + doAssert "1 23".find(ex1, 1).get.matchBounds == 2 .. 2 + doAssert "1 23".find(ex1, 3).get.matchBounds == 3 .. 3 + + let ex2 = re("()()()()()()()()()()([0-9])") + doAssert "824".find(ex2).get.captureBounds[0] == 0 .. -1 + doAssert "824".find(ex2).get.captureBounds[10] == 0 .. 0 + + let ex3 = re("([0-9]+)") + doAssert "824".find(ex3).get.captureBounds[0] == 0 .. 2 + + block: # named captures + let ex1 = "foobar".find(re("(?Pfoo)(?Pbar)")) + doAssert ex1.get.captures["foo"] == "foo" + doAssert ex1.get.captures["bar"] == "bar" + + let ex2 = "foo".find(re("(?Pfoo)(?Pbar)?")) + doAssert "foo" in ex2.get.captureBounds + doAssert ex2.get.captures["foo"] == "foo" + doAssert not ("bar" in ex2.get.captures) + doAssertRaises(KeyError): + discard ex2.get.captures["bar"] + + block: # named capture bounds + let ex1 = "foo".find(re("(?Pfoo)(?Pbar)?")) + doAssert "foo" in ex1.get.captureBounds + doAssert ex1.get.captureBounds["foo"] == 0..2 + doAssert not ("bar" in ex1.get.captures) + doAssertRaises(KeyError): + discard ex1.get.captureBounds["bar"] + + block: # capture count + let ex1 = re("(?Pfoo)(?Pbar)?") + doAssert ex1.captureCount == 2 + doAssert ex1.captureNameId == {"foo" : 0, "bar" : 1}.toTable() + + block: # named capture table + let ex1 = "foo".find(re("(?Pfoo)(?Pbar)?")) + doAssert ex1.get.captures.toTable == {"foo" : "foo"}.toTable() + doAssert ex1.get.captureBounds.toTable == {"foo" : 0..2}.toTable() + + let ex2 = "foobar".find(re("(?Pfoo)(?Pbar)?")) + doAssert ex2.get.captures.toTable == {"foo" : "foo", "bar" : "bar"}.toTable() + + block: # capture sequence + let ex1 = "foo".find(re("(?Pfoo)(?Pbar)?")) + doAssert ex1.get.captures.toSeq == @[some("foo"), none(string)] + doAssert ex1.get.captureBounds.toSeq == @[some(0..2), none(Slice[int])] + doAssert ex1.get.captures.toSeq(some("")) == @[some("foo"), some("")] + + let ex2 = "foobar".find(re("(?Pfoo)(?Pbar)?")) + doAssert ex2.get.captures.toSeq == @[some("foo"), some("bar")] + +block: # match + block: # upper bound must be inclusive + doAssert "abc".match(re"abc", endpos = -1) == none(RegexMatch) + doAssert "abc".match(re"abc", endpos = 1) == none(RegexMatch) + doAssert "abc".match(re"abc", endpos = 2) != none(RegexMatch) + + block: # match examples + doAssert "abc".match(re"(\w)").get.captures[0] == "a" + doAssert "abc".match(re"(?P\w)").get.captures["letter"] == "a" + doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab" + doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0 + doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1 + doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2 + + let cap1 = "abc".match(re"(\w)(\w)+").get.captures + doAssert cap1.len == 2 + doAssert 0 in cap1 + doAssert 1 in cap1 + doAssert cap1[0] == "a" and cap1[1] == "c" + doAssert 0 in "abc".match(re"(\w)+").get.captureBounds + + block: # match test cases + doAssert "123".match(re"").get.matchBounds == 0 .. -1 + let mat1 = "123".match(re"123").get + doAssert mat1.matchBounds == 0 .. 2 + doAssert mat1.match == "123" + +block: # find + block: # find text + doAssert "3213a".find(re"[a-z]").get.match == "a" + doAssert sequtils.toSeq(findIter("1 2 3 4 5 6 7 8 ", re" ")).mapIt( + it.match + ) == @[" ", " ", " ", " ", " ", " ", " ", " "] + + block: # find bounds + doAssert sequtils.toSeq(findIter("1 2 3 4 5 ", re" ")).mapIt( + it.matchBounds + ) == @[1..1, 3..3, 5..5, 7..7, 9..9] + + block: # overlapping find + doAssert "222".findAll(re"22") == @["22"] + doAssert "2222".findAll(re"22") == @["22", "22"] + + block: # len 0 find + doAssert "".findAll(re"\ ") == newSeq[string]() + doAssert "".findAll(re"") == @[""] + doAssert "abc".findAll(re"") == @["", "", "", ""] + doAssert "word word".findAll(re"\b") == @["", "", "", ""] + doAssert "word\r\lword".findAll(re"(?m)$") == @["", ""] + doAssert "слово слово".findAll(re"\b") == @["", "", "", ""] + +block: # contains + doAssert "abc".contains(re"bc") + doAssert not "abc".contains(re"cd") + doAssert not "abc".contains(re"a", start = 1) + +block: # string splitting + block: # splitting strings + doAssert "1 2 3 4 5 6 ".split(re" ") == @["1", "2", "3", "4", "5", "6", ""] + doAssert "1 2 ".split(re(" ")) == @["1", "", "2", "", ""] + doAssert "1 2".split(re(" ")) == @["1", "2"] + doAssert "foo".split(re("foo")) == @["", ""] + doAssert "".split(re"foo") == @[""] + doAssert "9".split(re"\son\s") == @["9"] + + block: # captured patterns + doAssert "12".split(re"(\d)") == @["", "1", "", "2", ""] + + block: # maxsplit + doAssert "123".split(re"", maxsplit = 2) == @["1", "23"] + doAssert "123".split(re"", maxsplit = 1) == @["123"] + doAssert "123".split(re"", maxsplit = -1) == @["1", "2", "3"] + doAssert "1 2 3".split(re" ", maxsplit = 1) == @["1 2 3"] + doAssert "1 2 3".split(re" ", maxsplit = 2) == @["1", "2 3"] + doAssert "1 2 3".split(re"( )", maxsplit = 2) == @["1", " ", "2 3"] + + block: # split with 0-length match + doAssert "12345".split(re("")) == @["1", "2", "3", "4", "5"] + doAssert "".split(re"") == newSeq[string]() + doAssert "word word".split(re"\b") == @["word", " ", "word"] + #doAssert "word\r\lword".split(re"(?m)$") == @["word", "\r\lword"] + doAssert "слово слово".split(re"(\b)") == @["слово", "", " ", "", "слово", ""] + + block: # perl split tests + doAssert "forty-two" .split(re"") .join(",") == "f,o,r,t,y,-,t,w,o" + doAssert "forty-two" .split(re"", 3) .join(",") == "f,o,rty-two" + doAssert "split this string" .split(re" ") .join(",") == "split,this,string" + doAssert "split this string" .split(re" ", 2) .join(",") == "split,this string" + doAssert "try$this$string" .split(re"\$") .join(",") == "try,this,string" + doAssert "try$this$string" .split(re"\$", 2) .join(",") == "try,this$string" + doAssert "comma, separated, values" .split(re", ") .join("|") == "comma|separated|values" + doAssert "comma, separated, values" .split(re", ", 2) .join("|") == "comma|separated, values" + doAssert "Perl6::Camelia::Test" .split(re"::") .join(",") == "Perl6,Camelia,Test" + doAssert "Perl6::Camelia::Test" .split(re"::", 2) .join(",") == "Perl6,Camelia::Test" + doAssert "split,me,please" .split(re",") .join("|") == "split|me|please" + doAssert "split,me,please" .split(re",", 2) .join("|") == "split|me,please" + doAssert "Hello World Goodbye Mars".split(re"\s+") .join(",") == "Hello,World,Goodbye,Mars" + doAssert "Hello World Goodbye Mars".split(re"\s+", 3).join(",") == "Hello,World,Goodbye Mars" + doAssert "Hello test" .split(re"(\s+)") .join(",") == "Hello, ,test" + doAssert "this will be split" .split(re" ") .join(",") == "this,will,be,split" + doAssert "this will be split" .split(re" ", 3) .join(",") == "this,will,be split" + doAssert "a.b" .split(re"\.") .join(",") == "a,b" + doAssert "" .split(re"") .len == 0 + doAssert ":" .split(re"") .len == 1 + + block: # start position + doAssert "abc".split(re"", start = 1) == @["b", "c"] + doAssert "abc".split(re"", start = 2) == @["c"] + doAssert "abc".split(re"", start = 3) == newSeq[string]() + doAssert "abc".split(re"^b", start = 1) == @["bc"] + +block: # replace + block: # replace with 0-length strings + doAssert "".replace(re"1", proc (v: RegexMatch): string = "1") == "" + doAssert " ".replace(re"", proc (v: RegexMatch): string = "1") == "1 1" + doAssert "".replace(re"", proc (v: RegexMatch): string = "1") == "1" + + block: # regular replace + doAssert "123".replace(re"\d", "foo") == "foofoofoo" + doAssert "123".replace(re"(\d)", "$1$1") == "112233" + doAssert "123".replace(re"(\d)(\d)", "$1$2") == "123" + doAssert "123".replace(re"(\d)(\d)", "$#$#") == "123" + doAssert "abcdefghijklm".replace(re"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)", "$12") == "l" + + block: # replacing missing captures should throw instead of segfaulting + doAssertRaises(ValueError): discard "ab".replace(re"(a)", "$1$2") + +block: # escape strings + block: # escape strings + doAssert "123".escapeRe() == "123" + doAssert "[]".escapeRe() == r"\[\]" + doAssert "()".escapeRe() == r"\(\)" diff --git a/tests/stdlib/tnre2.nims b/tests/stdlib/tnre2.nims new file mode 100644 index 0000000000..ea30b440ce --- /dev/null +++ b/tests/stdlib/tnre2.nims @@ -0,0 +1,3 @@ +# std/nre2 requires nim-regex and it requires nim-unicodedb +exec("nimble --nimbleDir:build/deps install unicodedb@#head") +exec("nimble --nimbleDir:build/deps install regex@#head")