mirror of
https://github.com/nim-lang/Nim.git
synced 2026-05-25 06:18:16 +00:00
std/nre2 is implemented using https://github.com/nitely/nim-regex. std/nre2 has almost same features as std/nre but some regular expressions supported by std/nre are not supported. The syntax of regular expressions of Nim Regex is explained in: https://nitely.github.io/nim-regex/regex.html
This commit is contained in:
@@ -66,12 +66,17 @@ errors.
|
||||
Modes include `Nim` (default, fully compatible) and two new experimental modes:
|
||||
`Lax` and `Gnu` for different option parsing behaviors.
|
||||
|
||||
- `std/nre2` is added to replace deprecated NRE.
|
||||
|
||||
[//]: # "Changes:"
|
||||
|
||||
- `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type.
|
||||
- `min`, `max`, and `sequtils`' `minIndex`, `maxIndex` and `minmax` for `openArray`s now accept a comparison function.
|
||||
- `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation.
|
||||
- `system.newStringUninit` is now considered free of side-effects allowing it to be used with `--experimental:strictFuncs`.
|
||||
- `std/re` and `std/nre` are deprecated as PCRE library is obsolete.
|
||||
Use https://github.com/nitely/nim-regex or `std/nre2`.
|
||||
See: https://github.com/nim-lang/Nim/issues/23668.
|
||||
|
||||
## Language changes
|
||||
|
||||
|
||||
@@ -9,6 +9,11 @@
|
||||
when defined(js):
|
||||
{.error: "This library needs to be compiled with a c-like backend, and depends on PCRE; See jsre for JS backend.".}
|
||||
|
||||
## .. warning:: NRE is deprecated.
|
||||
## Use [Regex](https://github.com/nitely/nim-regex) or
|
||||
## `NRE2 <nre2.html>`_ that wraps Regex so that you can easily replace NRE.
|
||||
## PCRE library is now at end of life.
|
||||
##
|
||||
## What is NRE?
|
||||
## ============
|
||||
##
|
||||
@@ -84,7 +89,7 @@ type
|
||||
Regex* = ref RegexDesc
|
||||
## Represents the pattern that things are matched against, constructed with
|
||||
## `re(string)`. Examples: `re"foo"`, `re(r"(*ANYCRLF)(?x)foo #
|
||||
## comment".`
|
||||
## comment")`
|
||||
##
|
||||
## `pattern: string`
|
||||
## : the string that was used to create the pattern. For details on how
|
||||
@@ -154,7 +159,7 @@ type
|
||||
## will need to pass these as separate flags to PCRE.
|
||||
|
||||
RegexMatch* = object
|
||||
## Usually seen as Option[RegexMatch], it represents the result of an
|
||||
## Usually seen as `Option[RegexMatch]`, it represents the result of an
|
||||
## execution. On failure, it is none, on success, it is some.
|
||||
##
|
||||
## `pattern: Regex`
|
||||
|
||||
@@ -10,6 +10,10 @@
|
||||
when defined(js):
|
||||
{.error: "This library needs to be compiled with a c-like backend, and depends on PCRE; See jsre for JS backend.".}
|
||||
|
||||
## .. warning:: This module is deprecated.
|
||||
## Use [Regex](https://github.com/nitely/nim-regex).
|
||||
## PCRE library is now at end of life.
|
||||
##
|
||||
## Regular expression support for Nim.
|
||||
##
|
||||
## This module is implemented by providing a wrapper around the
|
||||
|
||||
344
lib/std/nre2.nim
Normal file
344
lib/std/nre2.nim
Normal file
@@ -0,0 +1,344 @@
|
||||
#
|
||||
# Nim's Runtime Library
|
||||
# (c) Copyright 2026 Nim Contributors
|
||||
#
|
||||
# See the file "copying.txt", included in this
|
||||
# distribution, for details about the copyright.
|
||||
#
|
||||
|
||||
## What is NRE2?
|
||||
## =============
|
||||
##
|
||||
## A regular expression library for Nim to replace deprecated NRE.
|
||||
## It is implemented with `Regex<https://github.com/nitely/nim-regex>`_ ,
|
||||
## that is pure Nim regex engine and guarantees linear time matching.
|
||||
## It supports compiling regex and matching at compile-time and
|
||||
## works with JS backend.
|
||||
##
|
||||
## NRE2 is mostly compatible with NRE and the syntax of regular expression is similar to PCRE.
|
||||
## But it lacks a few features and how to set options in a pattern is different.
|
||||
##
|
||||
## The syntax of regular expression is explained in https://nitely.github.io/nim-regex/regex.html
|
||||
runnableExamples:
|
||||
import std/sugar
|
||||
let vowels = re"[aeoui]"
|
||||
let bounds = collect:
|
||||
for match in "moiga".findIter(vowels): match.matchBounds
|
||||
assert bounds == @[1 .. 1, 2 .. 2, 4 .. 4]
|
||||
from std/sequtils import toSeq
|
||||
let s = sequtils.toSeq("moiga".findIter(vowels))
|
||||
# fully qualified to avoid confusion with nre.toSeq
|
||||
assert s.len == 3
|
||||
|
||||
let firstVowel = "foo".find(vowels)
|
||||
let hasVowel = firstVowel.isSome()
|
||||
assert hasVowel
|
||||
let matchBounds = firstVowel.get().captureBounds[-1]
|
||||
assert matchBounds.a == 1
|
||||
|
||||
# as with module `re`, unless specified otherwise, `start` parameter in each
|
||||
# proc indicates where the scan starts, but outputs are relative to the start
|
||||
# of the input string, not to `start`:
|
||||
assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
|
||||
assert find("uxabc", re"ab", start = 3).isNone
|
||||
|
||||
import std/[options, tables]
|
||||
import regex, regex/nfatype
|
||||
|
||||
export options
|
||||
export regex.RegexFlags, regex.RegexError
|
||||
|
||||
type
|
||||
Regex* = regex.Regex2
|
||||
## Represents the pattern that things are matched against, constructed with
|
||||
## `re(string)`. Examples: `re"foo"`, `re(r"(?x)foo #comment")`
|
||||
##
|
||||
## `captureCount: int`
|
||||
## : the number of captures that the pattern has.
|
||||
##
|
||||
## `captureNameId: Table[string, int]`
|
||||
## : a table from the capture names to their numeric id.
|
||||
##
|
||||
## The syntax of regular expression of Regex is explained in https://nitely.github.io/nim-regex/regex.html
|
||||
|
||||
RegexMatch* = object
|
||||
## Usually seen as `Option[RegexMatch]`, it represents the result of an
|
||||
## execution. On failure, it is none, on success, it is some.
|
||||
##
|
||||
## `str: string`
|
||||
## : the string that was matched against
|
||||
##
|
||||
## `captures[]: string`
|
||||
## : the string value of whatever was captured at that id. If the value
|
||||
## is invalid, then behavior is undefined. If the id is `-1`, then
|
||||
## the whole match is returned. If the given capture was not matched,
|
||||
## `nil` is returned. See examples for `match`.
|
||||
##
|
||||
## `captureBounds[]: HSlice[int, int]`
|
||||
## : gets the bounds of the given capture according to the same rules as
|
||||
## the above. If the capture is not filled, then `None` is returned.
|
||||
## The bounds are both inclusive. See examples for `match`.
|
||||
##
|
||||
## `match: string`
|
||||
## : the full text of the match.
|
||||
##
|
||||
## `matchBounds: HSlice[int, int]`
|
||||
## : the bounds of the match, as in `captureBounds[]`
|
||||
##
|
||||
## `(captureBounds|captures).toTable`
|
||||
## : returns a table with each named capture as a key.
|
||||
##
|
||||
## `(captureBounds|captures).toSeq`
|
||||
## : returns all the captures by their number.
|
||||
##
|
||||
## `$: string`
|
||||
## : same as `match`
|
||||
str*: string ## The string that was matched against.
|
||||
matchImpl: regex.RegexMatch2
|
||||
|
||||
Captures* {.borrow: `.`.} = distinct RegexMatch
|
||||
CaptureBounds* {.borrow: `.`.} = distinct RegexMatch
|
||||
|
||||
func captureCount*(pattern: Regex): int {.inline.} =
|
||||
pattern.toRegex().groupsCount
|
||||
|
||||
func captureNameId*(pattern: Regex): Table[string, int] =
|
||||
result = initTable[string, int](pattern.toRegex().namedGroups.len)
|
||||
for k, v in pattern.toRegex().namedGroups:
|
||||
result[k] = v
|
||||
|
||||
func captureBounds*(match: RegexMatch): CaptureBounds {.inline.} =
|
||||
CaptureBounds(match)
|
||||
|
||||
func captures*(match: RegexMatch): Captures {.inline.} =
|
||||
Captures(match)
|
||||
|
||||
func contains*(match: Captures or CaptureBounds, i: int): bool {.inline.} =
|
||||
i >= -1 and i < match.matchImpl.groupsCount and match.matchImpl.group(i) != reNonCapture
|
||||
|
||||
func len*(match: Captures or CaptureBounds): int {.inline.} =
|
||||
## Return the number of capturing groups
|
||||
match.matchImpl.groupsCount
|
||||
|
||||
func `[]`*(match: CaptureBounds; i: int): HSlice[int, int] {.inline.} =
|
||||
if i == -1: match.matchImpl.boundaries else: match.matchImpl.group(i)
|
||||
|
||||
func `[]`*(match: CaptureBounds; name: string): HSlice[int, int] {.inline.} =
|
||||
result = match.matchImpl.group(name)
|
||||
if result == reNonCapture:
|
||||
raise newException(KeyError, "Group '" & name & "' was not captured")
|
||||
|
||||
func `[]`*(match: Captures; i: int): string {.inline.} =
|
||||
match.str[CaptureBounds(match)[i]]
|
||||
|
||||
func `[]`*(match: Captures, name: string): string {.inline.} =
|
||||
match.str[CaptureBounds(match)[name]]
|
||||
|
||||
func match*(match: RegexMatch): string {.inline.} =
|
||||
match.str[match.matchImpl.boundaries]
|
||||
|
||||
func matchBounds*(match: RegexMatch): HSlice[int, int] {.inline.} =
|
||||
match.matchImpl.boundaries
|
||||
|
||||
func contains*(match: CaptureBounds or Captures, name: string): bool {.inline.} =
|
||||
name in match.matchImpl.namedGroups and
|
||||
match.matchImpl.group(name) != reNonCapture
|
||||
|
||||
func toTable*(match: Captures): Table[string, string] =
|
||||
result = initTable[string, string]()
|
||||
for k, i in match.matchImpl.namedGroups:
|
||||
let r = match.matchImpl.group(i)
|
||||
if r != reNonCapture:
|
||||
result[k] = match.str[r]
|
||||
|
||||
func toTable*(match: CaptureBounds): Table[string, HSlice[int, int]] =
|
||||
result = initTable[string, HSlice[int, int]]()
|
||||
for k, i in match.matchImpl.namedGroups:
|
||||
let r = match.matchImpl.group(i)
|
||||
if r != reNonCapture:
|
||||
result[k] = match.matchImpl.group(i)
|
||||
|
||||
iterator items*(match: CaptureBounds; default = none(HSlice[int, int])): Option[HSlice[int, int]] =
|
||||
for i in 0 ..< match.len:
|
||||
yield if i in match: some(match[i]) else: default
|
||||
|
||||
iterator items*(match: Captures; default = none(string)): Option[string] =
|
||||
for i in 0 ..< match.len:
|
||||
yield if i in match: some(match[i]) else: default
|
||||
|
||||
func toSeq*(match: CaptureBounds;
|
||||
default = none(HSlice[int, int])): seq[Option[HSlice[int, int]]] =
|
||||
result = @[]
|
||||
for it in match.items(default): result.add it
|
||||
|
||||
func toSeq*(match: Captures;
|
||||
default: Option[string] = none(string)): seq[Option[string]] =
|
||||
result = @[]
|
||||
for it in match.items(default): result.add it
|
||||
|
||||
func `$`*(match: RegexMatch): string =
|
||||
match.match
|
||||
|
||||
func re*(pattern: static string; flags: static RegexFlags = {}): static[Regex2] =
|
||||
## Parse and compile a regular expression at compile-time
|
||||
result = regex.re2(pattern, flags)
|
||||
|
||||
func re*(pattern: string; flags: RegexFlags = {}): Regex =
|
||||
## Parse and compile a regular expression at run-time
|
||||
result = regex.re2(pattern, flags)
|
||||
|
||||
func match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
|
||||
## Like `find(...)<#find,string,Regex,int>`_, but anchored to the start of the
|
||||
## string.
|
||||
runnableExamples:
|
||||
assert "foo".match(re"f").isSome
|
||||
assert "foo".match(re"o").isNone
|
||||
|
||||
assert "abc".match(re"(\w)").get.captures[0] == "a"
|
||||
assert "abc".match(re"(?P<letter>\w)").get.captures["letter"] == "a"
|
||||
assert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
|
||||
|
||||
assert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
|
||||
assert 0 in "abc".match(re"(\w)").get.captureBounds
|
||||
assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
|
||||
assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
|
||||
var mat = default(RegexMatch)
|
||||
let r = regex.startsWith(str.toOpenArray(0, min(str.high, endpos)), pattern, mat.matchImpl, start)
|
||||
if r:
|
||||
mat.str = str
|
||||
some(mat)
|
||||
else:
|
||||
none(RegexMatch)
|
||||
|
||||
iterator findIter*(str: string; pattern: Regex; start = 0, endpos = int.high): RegexMatch =
|
||||
## Works the same as `find(...)<#find,string,Regex,int>`_, but finds every
|
||||
## non-overlapping match:
|
||||
runnableExamples:
|
||||
import std/sugar
|
||||
assert collect(for a in "2222".findIter(re"22"): a.match) == @["22", "22"]
|
||||
# not @["22", "22", "22"]
|
||||
## Arguments are the same as `find(...)<#find,string,Regex,int>`_
|
||||
##
|
||||
## Variants:
|
||||
##
|
||||
## - `proc findAll(...)` returns a `seq[string]`
|
||||
var mat = RegexMatch(str: str)
|
||||
# TODO:
|
||||
# needs following PR to remove `substr` call.
|
||||
# https://github.com/nitely/nim-regex/pull/162
|
||||
for m in regex.findAll(str.substr(start, endpos), pattern):
|
||||
mat.matchImpl = m
|
||||
yield mat
|
||||
|
||||
proc find*(str: string; pattern: Regex; start = 0; endpos = int.high): Option[RegexMatch] =
|
||||
## Finds the given pattern in the string between the end and start
|
||||
## positions.
|
||||
##
|
||||
## `start`
|
||||
## : The start point at which to start matching. `|abc` is `0`;
|
||||
## `a|bc` is `1`
|
||||
##
|
||||
## `endpos`
|
||||
## : The maximum index for a match; `int.high` means the end of the
|
||||
## string, otherwise it’s an inclusive upper bound.
|
||||
var mat = default(RegexMatch)
|
||||
let r = regex.find(str.substr(start, endpos), pattern, mat.matchImpl)
|
||||
|
||||
# remove following code after regex.find get `start`/`last` parameter
|
||||
for v in mat.matchImpl.captures.mitems:
|
||||
v.a += start
|
||||
v.b += start
|
||||
mat.matchImpl.boundaries.a += start
|
||||
mat.matchImpl.boundaries.b += start
|
||||
|
||||
if r:
|
||||
mat.str = str
|
||||
some(mat)
|
||||
else:
|
||||
none(RegexMatch)
|
||||
|
||||
proc findAll*(str: string; pattern: Regex; start = 0; endpos = int.high): seq[string] =
|
||||
result = @[]
|
||||
for match in str.findIter(pattern, start, endpos):
|
||||
result.add(match.match)
|
||||
|
||||
proc contains*(str: string; pattern: Regex; start = 0; endpos = int.high): bool =
|
||||
## Determine if the string contains the given pattern between the end and
|
||||
## start positions:
|
||||
## This function is equivalent to `isSome(str.find(pattern, start, endpos))`.
|
||||
runnableExamples:
|
||||
assert "abc".contains(re"bc")
|
||||
assert not "abc".contains(re"cd")
|
||||
assert not "abc".contains(re"a", start = 1)
|
||||
|
||||
isSome(str.find(pattern, start, endpos))
|
||||
|
||||
proc split*(str: string; pattern: Regex; maxSplit = -1; start = 0): seq[string] =
|
||||
## Splits the string with the given regex. This works according to the
|
||||
## rules that Perl and Javascript use.
|
||||
##
|
||||
## `start` behaves the same as in `find(...)<#find,string,Regex,int>`_.
|
||||
##
|
||||
runnableExamples:
|
||||
# - If the match is zero-width, then the string is still split:
|
||||
assert "123".split(re"") == @["1", "2", "3"]
|
||||
|
||||
# - If the pattern has a capture in it, it is added after the string
|
||||
# split:
|
||||
assert "12".split(re"(\d)") == @["", "1", "", "2", ""]
|
||||
|
||||
# - If `maxsplit != -1`, then the string will only be split
|
||||
# `maxsplit - 1` times. This means that there will be `maxsplit`
|
||||
# strings in the output seq.
|
||||
assert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]
|
||||
|
||||
result = splitIncl(str, pattern, maxSplit, start)
|
||||
|
||||
proc replace*(str: string; pattern: Regex;
|
||||
subproc: proc (match: RegexMatch): string): string =
|
||||
## Replaces each match of Regex in the string with `subproc`, which should
|
||||
## never be or return `nil`.
|
||||
##
|
||||
## If `subproc` is a `proc (RegexMatch): string`, then it is executed with
|
||||
## each match and the return value is the replacement value.
|
||||
##
|
||||
## If `subproc` is a `proc (string): string`, then it is executed with the
|
||||
## full text of the match and the return value is the replacement value.
|
||||
##
|
||||
## If `subproc` is a string, the syntax is as follows:
|
||||
##
|
||||
## - `$$` - literal `$`
|
||||
## - `$123` - capture number `123`
|
||||
## - `$1$#` - first and second captures
|
||||
## - `$#` - first capture
|
||||
##
|
||||
## Following syntax is not supported in NRE2
|
||||
##
|
||||
## - `$foo` - named capture `foo`
|
||||
## - `${foo}` - same as above
|
||||
## - `$0` - full match
|
||||
##
|
||||
## If a given capture is missing, `ValueError` is thrown.
|
||||
proc by(m: RegexMatch2, s: string): string =
|
||||
let mat = RegexMatch(str: s, matchImpl: m)
|
||||
result = subproc(mat)
|
||||
|
||||
result = regex.replace(str, pattern, by)
|
||||
|
||||
proc replace*(str: string; pattern: Regex;
|
||||
subproc: proc (match: string): string): string =
|
||||
proc by(m: RegexMatch2; s: string): string =
|
||||
result = subproc(s)
|
||||
|
||||
result = regex.replace(str, pattern, by)
|
||||
|
||||
proc replace*(str: string; pattern: Regex; sub: string): string =
|
||||
result = regex.replace(str, pattern, sub)
|
||||
|
||||
func escapeRe*(str: string): string =
|
||||
## Escapes the string so it doesn't match any special characters.
|
||||
runnableExamples:
|
||||
assert escapeRe("fly+wind") == "fly\\+wind"
|
||||
assert escapeRe("nim*") == "nim\\*"
|
||||
|
||||
result = regex.escapeRe(str)
|
||||
14
lib/std/nre2.nims
Normal file
14
lib/std/nre2.nims
Normal file
@@ -0,0 +1,14 @@
|
||||
import std/os
|
||||
|
||||
if getCommand() == "doc":
|
||||
# std/nre2 requires nim-regex and it requires nim-unicodedb.
|
||||
# when build documentation on CI, git clone them as nimble is not available
|
||||
|
||||
const PkgDir = "build/deps"
|
||||
const Pkgs = ["nim-regex", "nim-unicodedb"]
|
||||
|
||||
for n in Pkgs:
|
||||
if not dirExists(PkgDir / n):
|
||||
exec("git clone -q https://github.com/nitely/" & n & " " & (PkgDir / n))
|
||||
|
||||
switch("path", "$nim" / PkgDir / n / "src")
|
||||
196
tests/stdlib/tnre2.nim
Normal file
196
tests/stdlib/tnre2.nim
Normal file
@@ -0,0 +1,196 @@
|
||||
import std/[assertions, options, sequtils, strutils, tables]
|
||||
import std/nre2
|
||||
|
||||
block:
|
||||
let pattern = "[0-9"
|
||||
doAssertRaises(RegexError): discard re(pattern)
|
||||
|
||||
block: # captures
|
||||
block: # capture bounds are correct
|
||||
let ex1 = re("([0-9])")
|
||||
doAssert "1 23".find(ex1).get.matchBounds == 0 .. 0
|
||||
doAssert "1 23".find(ex1).get.captureBounds[0] == 0 .. 0
|
||||
doAssert "1 23".find(ex1, 1).get.matchBounds == 2 .. 2
|
||||
doAssert "1 23".find(ex1, 3).get.matchBounds == 3 .. 3
|
||||
|
||||
let ex2 = re("()()()()()()()()()()([0-9])")
|
||||
doAssert "824".find(ex2).get.captureBounds[0] == 0 .. -1
|
||||
doAssert "824".find(ex2).get.captureBounds[10] == 0 .. 0
|
||||
|
||||
let ex3 = re("([0-9]+)")
|
||||
doAssert "824".find(ex3).get.captureBounds[0] == 0 .. 2
|
||||
|
||||
block: # named captures
|
||||
let ex1 = "foobar".find(re("(?P<foo>foo)(?P<bar>bar)"))
|
||||
doAssert ex1.get.captures["foo"] == "foo"
|
||||
doAssert ex1.get.captures["bar"] == "bar"
|
||||
|
||||
let ex2 = "foo".find(re("(?P<foo>foo)(?P<bar>bar)?"))
|
||||
doAssert "foo" in ex2.get.captureBounds
|
||||
doAssert ex2.get.captures["foo"] == "foo"
|
||||
doAssert not ("bar" in ex2.get.captures)
|
||||
doAssertRaises(KeyError):
|
||||
discard ex2.get.captures["bar"]
|
||||
|
||||
block: # named capture bounds
|
||||
let ex1 = "foo".find(re("(?P<foo>foo)(?P<bar>bar)?"))
|
||||
doAssert "foo" in ex1.get.captureBounds
|
||||
doAssert ex1.get.captureBounds["foo"] == 0..2
|
||||
doAssert not ("bar" in ex1.get.captures)
|
||||
doAssertRaises(KeyError):
|
||||
discard ex1.get.captureBounds["bar"]
|
||||
|
||||
block: # capture count
|
||||
let ex1 = re("(?P<foo>foo)(?P<bar>bar)?")
|
||||
doAssert ex1.captureCount == 2
|
||||
doAssert ex1.captureNameId == {"foo" : 0, "bar" : 1}.toTable()
|
||||
|
||||
block: # named capture table
|
||||
let ex1 = "foo".find(re("(?P<foo>foo)(?P<bar>bar)?"))
|
||||
doAssert ex1.get.captures.toTable == {"foo" : "foo"}.toTable()
|
||||
doAssert ex1.get.captureBounds.toTable == {"foo" : 0..2}.toTable()
|
||||
|
||||
let ex2 = "foobar".find(re("(?P<foo>foo)(?P<bar>bar)?"))
|
||||
doAssert ex2.get.captures.toTable == {"foo" : "foo", "bar" : "bar"}.toTable()
|
||||
|
||||
block: # capture sequence
|
||||
let ex1 = "foo".find(re("(?P<foo>foo)(?P<bar>bar)?"))
|
||||
doAssert ex1.get.captures.toSeq == @[some("foo"), none(string)]
|
||||
doAssert ex1.get.captureBounds.toSeq == @[some(0..2), none(Slice[int])]
|
||||
doAssert ex1.get.captures.toSeq(some("")) == @[some("foo"), some("")]
|
||||
|
||||
let ex2 = "foobar".find(re("(?P<foo>foo)(?P<bar>bar)?"))
|
||||
doAssert ex2.get.captures.toSeq == @[some("foo"), some("bar")]
|
||||
|
||||
block: # match
|
||||
block: # upper bound must be inclusive
|
||||
doAssert "abc".match(re"abc", endpos = -1) == none(RegexMatch)
|
||||
doAssert "abc".match(re"abc", endpos = 1) == none(RegexMatch)
|
||||
doAssert "abc".match(re"abc", endpos = 2) != none(RegexMatch)
|
||||
|
||||
block: # match examples
|
||||
doAssert "abc".match(re"(\w)").get.captures[0] == "a"
|
||||
doAssert "abc".match(re"(?P<letter>\w)").get.captures["letter"] == "a"
|
||||
doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
|
||||
doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
|
||||
doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
|
||||
doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
|
||||
|
||||
let cap1 = "abc".match(re"(\w)(\w)+").get.captures
|
||||
doAssert cap1.len == 2
|
||||
doAssert 0 in cap1
|
||||
doAssert 1 in cap1
|
||||
doAssert cap1[0] == "a" and cap1[1] == "c"
|
||||
doAssert 0 in "abc".match(re"(\w)+").get.captureBounds
|
||||
|
||||
block: # match test cases
|
||||
doAssert "123".match(re"").get.matchBounds == 0 .. -1
|
||||
let mat1 = "123".match(re"123").get
|
||||
doAssert mat1.matchBounds == 0 .. 2
|
||||
doAssert mat1.match == "123"
|
||||
|
||||
block: # find
|
||||
block: # find text
|
||||
doAssert "3213a".find(re"[a-z]").get.match == "a"
|
||||
doAssert sequtils.toSeq(findIter("1 2 3 4 5 6 7 8 ", re" ")).mapIt(
|
||||
it.match
|
||||
) == @[" ", " ", " ", " ", " ", " ", " ", " "]
|
||||
|
||||
block: # find bounds
|
||||
doAssert sequtils.toSeq(findIter("1 2 3 4 5 ", re" ")).mapIt(
|
||||
it.matchBounds
|
||||
) == @[1..1, 3..3, 5..5, 7..7, 9..9]
|
||||
|
||||
block: # overlapping find
|
||||
doAssert "222".findAll(re"22") == @["22"]
|
||||
doAssert "2222".findAll(re"22") == @["22", "22"]
|
||||
|
||||
block: # len 0 find
|
||||
doAssert "".findAll(re"\ ") == newSeq[string]()
|
||||
doAssert "".findAll(re"") == @[""]
|
||||
doAssert "abc".findAll(re"") == @["", "", "", ""]
|
||||
doAssert "word word".findAll(re"\b") == @["", "", "", ""]
|
||||
doAssert "word\r\lword".findAll(re"(?m)$") == @["", ""]
|
||||
doAssert "слово слово".findAll(re"\b") == @["", "", "", ""]
|
||||
|
||||
block: # contains
|
||||
doAssert "abc".contains(re"bc")
|
||||
doAssert not "abc".contains(re"cd")
|
||||
doAssert not "abc".contains(re"a", start = 1)
|
||||
|
||||
block: # string splitting
|
||||
block: # splitting strings
|
||||
doAssert "1 2 3 4 5 6 ".split(re" ") == @["1", "2", "3", "4", "5", "6", ""]
|
||||
doAssert "1 2 ".split(re(" ")) == @["1", "", "2", "", ""]
|
||||
doAssert "1 2".split(re(" ")) == @["1", "2"]
|
||||
doAssert "foo".split(re("foo")) == @["", ""]
|
||||
doAssert "".split(re"foo") == @[""]
|
||||
doAssert "9".split(re"\son\s") == @["9"]
|
||||
|
||||
block: # captured patterns
|
||||
doAssert "12".split(re"(\d)") == @["", "1", "", "2", ""]
|
||||
|
||||
block: # maxsplit
|
||||
doAssert "123".split(re"", maxsplit = 2) == @["1", "23"]
|
||||
doAssert "123".split(re"", maxsplit = 1) == @["123"]
|
||||
doAssert "123".split(re"", maxsplit = -1) == @["1", "2", "3"]
|
||||
doAssert "1 2 3".split(re" ", maxsplit = 1) == @["1 2 3"]
|
||||
doAssert "1 2 3".split(re" ", maxsplit = 2) == @["1", "2 3"]
|
||||
doAssert "1 2 3".split(re"( )", maxsplit = 2) == @["1", " ", "2 3"]
|
||||
|
||||
block: # split with 0-length match
|
||||
doAssert "12345".split(re("")) == @["1", "2", "3", "4", "5"]
|
||||
doAssert "".split(re"") == newSeq[string]()
|
||||
doAssert "word word".split(re"\b") == @["word", " ", "word"]
|
||||
#doAssert "word\r\lword".split(re"(?m)$") == @["word", "\r\lword"]
|
||||
doAssert "слово слово".split(re"(\b)") == @["слово", "", " ", "", "слово", ""]
|
||||
|
||||
block: # perl split tests
|
||||
doAssert "forty-two" .split(re"") .join(",") == "f,o,r,t,y,-,t,w,o"
|
||||
doAssert "forty-two" .split(re"", 3) .join(",") == "f,o,rty-two"
|
||||
doAssert "split this string" .split(re" ") .join(",") == "split,this,string"
|
||||
doAssert "split this string" .split(re" ", 2) .join(",") == "split,this string"
|
||||
doAssert "try$this$string" .split(re"\$") .join(",") == "try,this,string"
|
||||
doAssert "try$this$string" .split(re"\$", 2) .join(",") == "try,this$string"
|
||||
doAssert "comma, separated, values" .split(re", ") .join("|") == "comma|separated|values"
|
||||
doAssert "comma, separated, values" .split(re", ", 2) .join("|") == "comma|separated, values"
|
||||
doAssert "Perl6::Camelia::Test" .split(re"::") .join(",") == "Perl6,Camelia,Test"
|
||||
doAssert "Perl6::Camelia::Test" .split(re"::", 2) .join(",") == "Perl6,Camelia::Test"
|
||||
doAssert "split,me,please" .split(re",") .join("|") == "split|me|please"
|
||||
doAssert "split,me,please" .split(re",", 2) .join("|") == "split|me,please"
|
||||
doAssert "Hello World Goodbye Mars".split(re"\s+") .join(",") == "Hello,World,Goodbye,Mars"
|
||||
doAssert "Hello World Goodbye Mars".split(re"\s+", 3).join(",") == "Hello,World,Goodbye Mars"
|
||||
doAssert "Hello test" .split(re"(\s+)") .join(",") == "Hello, ,test"
|
||||
doAssert "this will be split" .split(re" ") .join(",") == "this,will,be,split"
|
||||
doAssert "this will be split" .split(re" ", 3) .join(",") == "this,will,be split"
|
||||
doAssert "a.b" .split(re"\.") .join(",") == "a,b"
|
||||
doAssert "" .split(re"") .len == 0
|
||||
doAssert ":" .split(re"") .len == 1
|
||||
|
||||
block: # start position
|
||||
doAssert "abc".split(re"", start = 1) == @["b", "c"]
|
||||
doAssert "abc".split(re"", start = 2) == @["c"]
|
||||
doAssert "abc".split(re"", start = 3) == newSeq[string]()
|
||||
doAssert "abc".split(re"^b", start = 1) == @["bc"]
|
||||
|
||||
block: # replace
|
||||
block: # replace with 0-length strings
|
||||
doAssert "".replace(re"1", proc (v: RegexMatch): string = "1") == ""
|
||||
doAssert " ".replace(re"", proc (v: RegexMatch): string = "1") == "1 1"
|
||||
doAssert "".replace(re"", proc (v: RegexMatch): string = "1") == "1"
|
||||
|
||||
block: # regular replace
|
||||
doAssert "123".replace(re"\d", "foo") == "foofoofoo"
|
||||
doAssert "123".replace(re"(\d)", "$1$1") == "112233"
|
||||
doAssert "123".replace(re"(\d)(\d)", "$1$2") == "123"
|
||||
doAssert "123".replace(re"(\d)(\d)", "$#$#") == "123"
|
||||
doAssert "abcdefghijklm".replace(re"(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)", "$12") == "l"
|
||||
|
||||
block: # replacing missing captures should throw instead of segfaulting
|
||||
doAssertRaises(ValueError): discard "ab".replace(re"(a)", "$1$2")
|
||||
|
||||
block: # escape strings
|
||||
block: # escape strings
|
||||
doAssert "123".escapeRe() == "123"
|
||||
doAssert "[]".escapeRe() == r"\[\]"
|
||||
doAssert "()".escapeRe() == r"\(\)"
|
||||
3
tests/stdlib/tnre2.nims
Normal file
3
tests/stdlib/tnre2.nims
Normal file
@@ -0,0 +1,3 @@
|
||||
# std/nre2 requires nim-regex and it requires nim-unicodedb
|
||||
exec("nimble --nimbleDir:build/deps install unicodedb@#head")
|
||||
exec("nimble --nimbleDir:build/deps install regex@#head")
|
||||
Reference in New Issue
Block a user