Files
Nim/lib/std/nre2.nim
Tomohiro 8b44b9d9ae fixes #23668; Create a new std/nre2 module using Nim Regex replaces re and nre (#25696)
std/nre2 is implemented using https://github.com/nitely/nim-regex.
std/nre2 has almost same features as std/nre but some regular
expressions supported by std/nre are not supported.
The syntax of regular expressions of Nim Regex is explained in:
https://nitely.github.io/nim-regex/regex.html
2026-04-22 08:06:03 +02:00

345 lines
12 KiB
Nim
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#
# Nim's Runtime Library
# (c) Copyright 2026 Nim Contributors
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## What is NRE2?
## =============
##
## A regular expression library for Nim to replace deprecated NRE.
## It is implemented with `Regex<https://github.com/nitely/nim-regex>`_ ,
## that is pure Nim regex engine and guarantees linear time matching.
## It supports compiling regex and matching at compile-time and
## works with JS backend.
##
## NRE2 is mostly compatible with NRE and the syntax of regular expression is similar to PCRE.
## But it lacks a few features and how to set options in a pattern is different.
##
## The syntax of regular expression is explained in https://nitely.github.io/nim-regex/regex.html
runnableExamples:
import std/sugar
let vowels = re"[aeoui]"
let bounds = collect:
for match in "moiga".findIter(vowels): match.matchBounds
assert bounds == @[1 .. 1, 2 .. 2, 4 .. 4]
from std/sequtils import toSeq
let s = sequtils.toSeq("moiga".findIter(vowels))
# fully qualified to avoid confusion with nre.toSeq
assert s.len == 3
let firstVowel = "foo".find(vowels)
let hasVowel = firstVowel.isSome()
assert hasVowel
let matchBounds = firstVowel.get().captureBounds[-1]
assert matchBounds.a == 1
# as with module `re`, unless specified otherwise, `start` parameter in each
# proc indicates where the scan starts, but outputs are relative to the start
# of the input string, not to `start`:
assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
assert find("uxabc", re"ab", start = 3).isNone
import std/[options, tables]
import regex, regex/nfatype
export options
export regex.RegexFlags, regex.RegexError
type
Regex* = regex.Regex2
## Represents the pattern that things are matched against, constructed with
## `re(string)`. Examples: `re"foo"`, `re(r"(?x)foo #comment")`
##
## `captureCount: int`
## : the number of captures that the pattern has.
##
## `captureNameId: Table[string, int]`
## : a table from the capture names to their numeric id.
##
## The syntax of regular expression of Regex is explained in https://nitely.github.io/nim-regex/regex.html
RegexMatch* = object
## Usually seen as `Option[RegexMatch]`, it represents the result of an
## execution. On failure, it is none, on success, it is some.
##
## `str: string`
## : the string that was matched against
##
## `captures[]: string`
## : the string value of whatever was captured at that id. If the value
## is invalid, then behavior is undefined. If the id is `-1`, then
## the whole match is returned. If the given capture was not matched,
## `nil` is returned. See examples for `match`.
##
## `captureBounds[]: HSlice[int, int]`
## : gets the bounds of the given capture according to the same rules as
## the above. If the capture is not filled, then `None` is returned.
## The bounds are both inclusive. See examples for `match`.
##
## `match: string`
## : the full text of the match.
##
## `matchBounds: HSlice[int, int]`
## : the bounds of the match, as in `captureBounds[]`
##
## `(captureBounds|captures).toTable`
## : returns a table with each named capture as a key.
##
## `(captureBounds|captures).toSeq`
## : returns all the captures by their number.
##
## `$: string`
## : same as `match`
str*: string ## The string that was matched against.
matchImpl: regex.RegexMatch2
Captures* {.borrow: `.`.} = distinct RegexMatch
CaptureBounds* {.borrow: `.`.} = distinct RegexMatch
func captureCount*(pattern: Regex): int {.inline.} =
pattern.toRegex().groupsCount
func captureNameId*(pattern: Regex): Table[string, int] =
result = initTable[string, int](pattern.toRegex().namedGroups.len)
for k, v in pattern.toRegex().namedGroups:
result[k] = v
func captureBounds*(match: RegexMatch): CaptureBounds {.inline.} =
CaptureBounds(match)
func captures*(match: RegexMatch): Captures {.inline.} =
Captures(match)
func contains*(match: Captures or CaptureBounds, i: int): bool {.inline.} =
i >= -1 and i < match.matchImpl.groupsCount and match.matchImpl.group(i) != reNonCapture
func len*(match: Captures or CaptureBounds): int {.inline.} =
## Return the number of capturing groups
match.matchImpl.groupsCount
func `[]`*(match: CaptureBounds; i: int): HSlice[int, int] {.inline.} =
if i == -1: match.matchImpl.boundaries else: match.matchImpl.group(i)
func `[]`*(match: CaptureBounds; name: string): HSlice[int, int] {.inline.} =
result = match.matchImpl.group(name)
if result == reNonCapture:
raise newException(KeyError, "Group '" & name & "' was not captured")
func `[]`*(match: Captures; i: int): string {.inline.} =
match.str[CaptureBounds(match)[i]]
func `[]`*(match: Captures, name: string): string {.inline.} =
match.str[CaptureBounds(match)[name]]
func match*(match: RegexMatch): string {.inline.} =
match.str[match.matchImpl.boundaries]
func matchBounds*(match: RegexMatch): HSlice[int, int] {.inline.} =
match.matchImpl.boundaries
func contains*(match: CaptureBounds or Captures, name: string): bool {.inline.} =
name in match.matchImpl.namedGroups and
match.matchImpl.group(name) != reNonCapture
func toTable*(match: Captures): Table[string, string] =
result = initTable[string, string]()
for k, i in match.matchImpl.namedGroups:
let r = match.matchImpl.group(i)
if r != reNonCapture:
result[k] = match.str[r]
func toTable*(match: CaptureBounds): Table[string, HSlice[int, int]] =
result = initTable[string, HSlice[int, int]]()
for k, i in match.matchImpl.namedGroups:
let r = match.matchImpl.group(i)
if r != reNonCapture:
result[k] = match.matchImpl.group(i)
iterator items*(match: CaptureBounds; default = none(HSlice[int, int])): Option[HSlice[int, int]] =
for i in 0 ..< match.len:
yield if i in match: some(match[i]) else: default
iterator items*(match: Captures; default = none(string)): Option[string] =
for i in 0 ..< match.len:
yield if i in match: some(match[i]) else: default
func toSeq*(match: CaptureBounds;
default = none(HSlice[int, int])): seq[Option[HSlice[int, int]]] =
result = @[]
for it in match.items(default): result.add it
func toSeq*(match: Captures;
default: Option[string] = none(string)): seq[Option[string]] =
result = @[]
for it in match.items(default): result.add it
func `$`*(match: RegexMatch): string =
match.match
func re*(pattern: static string; flags: static RegexFlags = {}): static[Regex2] =
## Parse and compile a regular expression at compile-time
result = regex.re2(pattern, flags)
func re*(pattern: string; flags: RegexFlags = {}): Regex =
## Parse and compile a regular expression at run-time
result = regex.re2(pattern, flags)
func match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
## Like `find(...)<#find,string,Regex,int>`_, but anchored to the start of the
## string.
runnableExamples:
assert "foo".match(re"f").isSome
assert "foo".match(re"o").isNone
assert "abc".match(re"(\w)").get.captures[0] == "a"
assert "abc".match(re"(?P<letter>\w)").get.captures["letter"] == "a"
assert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
assert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
assert 0 in "abc".match(re"(\w)").get.captureBounds
assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
var mat = default(RegexMatch)
let r = regex.startsWith(str.toOpenArray(0, min(str.high, endpos)), pattern, mat.matchImpl, start)
if r:
mat.str = str
some(mat)
else:
none(RegexMatch)
iterator findIter*(str: string; pattern: Regex; start = 0, endpos = int.high): RegexMatch =
## Works the same as `find(...)<#find,string,Regex,int>`_, but finds every
## non-overlapping match:
runnableExamples:
import std/sugar
assert collect(for a in "2222".findIter(re"22"): a.match) == @["22", "22"]
# not @["22", "22", "22"]
## Arguments are the same as `find(...)<#find,string,Regex,int>`_
##
## Variants:
##
## - `proc findAll(...)` returns a `seq[string]`
var mat = RegexMatch(str: str)
# TODO:
# needs following PR to remove `substr` call.
# https://github.com/nitely/nim-regex/pull/162
for m in regex.findAll(str.substr(start, endpos), pattern):
mat.matchImpl = m
yield mat
proc find*(str: string; pattern: Regex; start = 0; endpos = int.high): Option[RegexMatch] =
## Finds the given pattern in the string between the end and start
## positions.
##
## `start`
## : The start point at which to start matching. `|abc` is `0`;
## `a|bc` is `1`
##
## `endpos`
## : The maximum index for a match; `int.high` means the end of the
## string, otherwise its an inclusive upper bound.
var mat = default(RegexMatch)
let r = regex.find(str.substr(start, endpos), pattern, mat.matchImpl)
# remove following code after regex.find get `start`/`last` parameter
for v in mat.matchImpl.captures.mitems:
v.a += start
v.b += start
mat.matchImpl.boundaries.a += start
mat.matchImpl.boundaries.b += start
if r:
mat.str = str
some(mat)
else:
none(RegexMatch)
proc findAll*(str: string; pattern: Regex; start = 0; endpos = int.high): seq[string] =
result = @[]
for match in str.findIter(pattern, start, endpos):
result.add(match.match)
proc contains*(str: string; pattern: Regex; start = 0; endpos = int.high): bool =
## Determine if the string contains the given pattern between the end and
## start positions:
## This function is equivalent to `isSome(str.find(pattern, start, endpos))`.
runnableExamples:
assert "abc".contains(re"bc")
assert not "abc".contains(re"cd")
assert not "abc".contains(re"a", start = 1)
isSome(str.find(pattern, start, endpos))
proc split*(str: string; pattern: Regex; maxSplit = -1; start = 0): seq[string] =
## Splits the string with the given regex. This works according to the
## rules that Perl and Javascript use.
##
## `start` behaves the same as in `find(...)<#find,string,Regex,int>`_.
##
runnableExamples:
# - If the match is zero-width, then the string is still split:
assert "123".split(re"") == @["1", "2", "3"]
# - If the pattern has a capture in it, it is added after the string
# split:
assert "12".split(re"(\d)") == @["", "1", "", "2", ""]
# - If `maxsplit != -1`, then the string will only be split
# `maxsplit - 1` times. This means that there will be `maxsplit`
# strings in the output seq.
assert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]
result = splitIncl(str, pattern, maxSplit, start)
proc replace*(str: string; pattern: Regex;
subproc: proc (match: RegexMatch): string): string =
## Replaces each match of Regex in the string with `subproc`, which should
## never be or return `nil`.
##
## If `subproc` is a `proc (RegexMatch): string`, then it is executed with
## each match and the return value is the replacement value.
##
## If `subproc` is a `proc (string): string`, then it is executed with the
## full text of the match and the return value is the replacement value.
##
## If `subproc` is a string, the syntax is as follows:
##
## - `$$` - literal `$`
## - `$123` - capture number `123`
## - `$1$#` - first and second captures
## - `$#` - first capture
##
## Following syntax is not supported in NRE2
##
## - `$foo` - named capture `foo`
## - `${foo}` - same as above
## - `$0` - full match
##
## If a given capture is missing, `ValueError` is thrown.
proc by(m: RegexMatch2, s: string): string =
let mat = RegexMatch(str: s, matchImpl: m)
result = subproc(mat)
result = regex.replace(str, pattern, by)
proc replace*(str: string; pattern: Regex;
subproc: proc (match: string): string): string =
proc by(m: RegexMatch2; s: string): string =
result = subproc(s)
result = regex.replace(str, pattern, by)
proc replace*(str: string; pattern: Regex; sub: string): string =
result = regex.replace(str, pattern, sub)
func escapeRe*(str: string): string =
## Escapes the string so it doesn't match any special characters.
runnableExamples:
assert escapeRe("fly+wind") == "fly\\+wind"
assert escapeRe("nim*") == "nim\\*"
result = regex.escapeRe(str)