improve runnableExamples and docs for std/nre (#18634)

* improve runnableExamples and docs for std/nre

* avoid too long lines in example
This commit is contained in:
Timothee Cour
2021-08-02 22:22:47 -07:00
committed by GitHub
parent 499d4690fb
commit c0abdaa2b4

View File

@@ -23,12 +23,12 @@ when defined(js):
## **Note**: If you love `sequtils.toSeq` we have bad news for you. This
## library doesn't work with it due to documented compiler limitations. As
## a workaround, use this:
##
## .. code-block:: nim
##
## import std/nre except toSeq
##
##
runnableExamples:
# either `import std/nre except toSeq` or fully qualify `sequtils.toSeq`:
import std/sequtils
iterator iota(n: int): int =
for i in 0..<n: yield i
assert sequtils.toSeq(iota(3)) == @[0, 1, 2]
## Licencing
## ---------
##
@@ -36,33 +36,28 @@ when defined(js):
## this module.
##
## .. _`some additional terms`: http://pcre.sourceforge.net/license.txt
##
runnableExamples:
import std/sugar
let vowels = re"[aeoui]"
let expectedResults = [
1 .. 1,
2 .. 2,
4 .. 4,
6 .. 6,
7 .. 7,
]
var i = 0
for match in "moigagoo".findIter(vowels):
doAssert match.matchBounds == expectedResults[i]
inc i
let bounds = collect:
for match in "moiga".findIter(vowels): match.matchBounds
assert bounds == @[1 .. 1, 2 .. 2, 4 .. 4]
from std/sequtils import toSeq
let s = sequtils.toSeq("moiga".findIter(vowels))
# fully qualified to avoid confusion with nre.toSeq
assert s.len == 3
let firstVowel = "foo".find(vowels)
let hasVowel = firstVowel.isSome()
if hasVowel:
let matchBounds = firstVowel.get().captureBounds[-1]
doAssert matchBounds.a == 1
assert hasVowel
let matchBounds = firstVowel.get().captureBounds[-1]
assert matchBounds.a == 1
## as with module `re`, unless specified otherwise, `start` parameter in each
## proc indicates where the scan starts, but outputs are relative to the start
## of the input string, not to `start`:
doAssert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
doAssert find("uxabc", re"ab", start = 3).isNone
# as with module `re`, unless specified otherwise, `start` parameter in each
# proc indicates where the scan starts, but outputs are relative to the start
# of the input string, not to `start`:
assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
assert find("uxabc", re"ab", start = 3).isNone
from pcre import nil
import nre/private/util
@@ -145,7 +140,7 @@ type
## `DOLLAR_ENDONLY`, `FIRSTLINE`, `NO_AUTO_CAPTURE`,
## `JAVASCRIPT_COMPAT`, `U`, `NO_STUDY`. In other PCRE wrappers, you
## will need to pass these as separate flags to PCRE.
pattern*: string ## not nil
pattern*: string
pcreObj: ptr pcre.Pcre ## not nil
pcreExtra: ptr pcre.ExtraData ## nil
@@ -165,21 +160,12 @@ type
## the string value of whatever was captured at that id. If the value
## is invalid, then behavior is undefined. If the id is `-1`, then
## the whole match is returned. If the given capture was not matched,
## `nil` is returned.
##
## - `"abc".match(re"(\w)").get.captures[0] == "a"`
## - `"abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"`
## - `"abc".match(re"(\w)\w").get.captures[-1] == "ab"`
## `nil` is returned. See examples for `match`.
##
## `captureBounds[]: HSlice[int, int]`
## gets the bounds of the given capture according to the same rules as
## the above. If the capture is not filled, then `None` is returned.
## The bounds are both inclusive.
##
## - `"abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0`
## - `0 in "abc".match(re"(\w)").get.captureBounds == true`
## - `"abc".match(re"").get.captureBounds[-1] == 0 .. -1`
## - `"abc".match(re"abc").get.captureBounds[-1] == 0 .. 2`
## The bounds are both inclusive. See examples for `match`.
##
## `match: string`
## the full text of the match.
@@ -198,7 +184,6 @@ type
pattern*: Regex ## The regex doing the matching.
## Not nil.
str*: string ## The string that was matched against.
## Not nil.
pcreMatchBounds: seq[HSlice[cint, cint]] ## First item is the bounds of the match
## Other items are the captures
## `a` is inclusive start, `b` is exclusive end
@@ -226,18 +211,6 @@ type
## for whatever reason. The message contains the error
## code.
runnableExamples:
# This MUST be kept in sync with the examples in RegexMatch
doAssert "abc".match(re"(\w)").get.captures[0] == "a"
doAssert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
doAssert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
doAssert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
doAssert 0 in "abc".match(re"(\w)").get.captureBounds == true
doAssert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
doAssert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
proc destroyRegex(pattern: Regex) =
pcre.free_substring(cast[cstring](pattern.pcreObj))
if pattern.pcreExtra != nil:
@@ -540,18 +513,27 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, flags: int): Opt
proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
## Like `find(...)<#find,string,Regex,int>`_, but anchored to the start of the
## string.
##
runnableExamples:
doAssert "foo".match(re"f").isSome
doAssert "foo".match(re"o").isNone
assert "foo".match(re"f").isSome
assert "foo".match(re"o").isNone
assert "abc".match(re"(\w)").get.captures[0] == "a"
assert "abc".match(re"(?<letter>\w)").get.captures["letter"] == "a"
assert "abc".match(re"(\w)\w").get.captures[-1] == "ab"
assert "abc".match(re"(\w)").get.captureBounds[0] == 0 .. 0
assert 0 in "abc".match(re"(\w)").get.captureBounds
assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
return str.matchImpl(pattern, start, endpos, pcre.ANCHORED)
iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): RegexMatch =
## Works the same as `find(...)<#find,string,Regex,int>`_, but finds every
## non-overlapping match. `"2222".find(re"22")` is `"22", "22"`, not
## `"22", "22", "22"`.
##
## non-overlapping match:
runnableExamples:
import std/sugar
assert collect(for a in "2222".findIter(re"22"): a.match) == @["22", "22"]
# not @["22", "22", "22"]
## Arguments are the same as `find(...)<#find,string,Regex,int>`_
##
## Variants:
@@ -619,11 +601,10 @@ proc contains*(str: string, pattern: Regex, start = 0, endpos = int.high): bool
## Determine if the string contains the given pattern between the end and
## start positions:
## This function is equivalent to `isSome(str.find(pattern, start, endpos))`.
##
runnableExamples:
doAssert "abc".contains(re"bc")
doAssert not "abc".contains(re"cd")
doAssert not "abc".contains(re"a", start = 1)
assert "abc".contains(re"bc")
assert not "abc".contains(re"cd")
assert not "abc".contains(re"a", start = 1)
return isSome(str.find(pattern, start, endpos))
@@ -635,16 +616,16 @@ proc split*(str: string, pattern: Regex, maxSplit = -1, start = 0): seq[string]
##
runnableExamples:
# - If the match is zero-width, then the string is still split:
doAssert "123".split(re"") == @["1", "2", "3"]
assert "123".split(re"") == @["1", "2", "3"]
# - If the pattern has a capture in it, it is added after the string
# split:
doAssert "12".split(re"(\d)") == @["", "1", "", "2", ""]
assert "12".split(re"(\d)") == @["", "1", "", "2", ""]
# - If `maxsplit != -1`, then the string will only be split
# `maxsplit - 1` times. This means that there will be `maxsplit`
# strings in the output seq.
doAssert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]
assert "1.2.3".split(re"\.", maxsplit = 2) == @["1", "2.3"]
result = @[]
var lastIdx = start
@@ -747,9 +728,9 @@ proc escapeRe*(str: string): string {.gcsafe.} =
##
## Escaped char: `\ + * ? [ ^ ] $ ( ) { } = ! < > | : -`
runnableExamples:
doAssert escapeRe("fly+wind") == "fly\\+wind"
doAssert escapeRe("!") == "\\!"
doAssert escapeRe("nim*") == "nim\\*"
assert escapeRe("fly+wind") == "fly\\+wind"
assert escapeRe("!") == "\\!"
assert escapeRe("nim*") == "nim\\*"
#([\\+*?[^\]$(){}=!<>|:-])
const SpecialCharMatcher = {'\\', '+', '*', '?', '[', '^', ']', '$', '(',