test pcre2

This commit is contained in:
ringabout
2026-05-27 19:34:03 +08:00
parent 8771451701
commit b85c6bc9ec
10 changed files with 570 additions and 265 deletions

View File

@@ -49,7 +49,7 @@ jobs:
DEBIAN_FRONTEND='noninteractive' \
sudo apt-get install --no-install-recommends -yq \
libcurl4-openssl-dev libgc-dev libsdl1.2-dev libsfml-dev \
valgrind libc6-dbg libblas-dev liblapack-dev libpcre3 xorg-dev
valgrind libc6-dbg libblas-dev liblapack-dev libpcre2-dev xorg-dev
- name: 'Install dependencies (macOS)'
if: runner.os == 'macOS'
run: brew install boehmgc make sfml gtk+3

View File

@@ -78,8 +78,8 @@ parameter and result types, not just their source-level shape. Use
- `min`, `max`, and `sequtils`' `minIndex`, `maxIndex` and `minmax` for `openArray`s now accept a comparison function.
- `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation.
- `system.newStringUninit` is now considered free of side-effects allowing it to be used with `--experimental:strictFuncs`.
- `std/re` and `std/nre` are deprecated as PCRE library is obsolete.
Use https://github.com/nitely/nim-regex or `std/nre2`.
- `std/re` and `std/nre` now use PCRE2. They remain deprecated;
use https://github.com/nitely/nim-regex or `std/nre2`.
See: https://github.com/nim-lang/Nim/issues/23668.
- `std/pegs` now correctly lexes UTF-8 bytes inside bare identifier-style
terminals, so case-insensitive matching of non-ASCII terms (e.g. ``\i café``)

View File

@@ -596,12 +596,12 @@ Regular expressions
* [re](re.html)
Procedures and operators for handling regular
expressions. The current implementation uses PCRE.
expressions. The current implementation uses PCRE2.
* [nre](nre.html)
Many help functions for handling regular expressions.
The current implementation uses PCRE.
The current implementation uses PCRE2.
Database support
----------------
@@ -661,6 +661,9 @@ Regular expressions
* [pcre](pcre.html)
Wrapper for the PCRE library.
* [pcre2](pcre2.html)
Wrapper for the PCRE2 library.
Database support
----------------

View File

@@ -86,7 +86,7 @@ That means you can always use only 1 such an option with logical OR, e.g.
Meaning of `^`:literal: and `$`:literal:
========================================
`nimgrep`:cmd: PCRE engine is run in a single-line mode so
`nimgrep`:cmd: PCRE2 engine is run in a single-line mode so
`^`:literal: matches the beginning of whole input *file* and
`$`:literal: matches the end of *file* (or whole input *string* for
options like `--filename`).
@@ -97,7 +97,7 @@ Add the `(?m)`:literal: modifier to the beginning of your pattern for
Examples
========
All examples below use default PCRE Regex patterns:
All examples below use default PCRE2 Regex patterns:
+ To search recursively in Nim files using style-insensitive identifiers:

View File

@@ -7,21 +7,21 @@
#
when defined(js):
{.error: "This library needs to be compiled with a c-like backend, and depends on PCRE; See jsre for JS backend.".}
{.error: "This library needs to be compiled with a c-like backend, and depends on PCRE2; See jsre for JS backend.".}
## .. warning:: NRE is deprecated.
## Use [Regex](https://github.com/nitely/nim-regex) or
## `NRE2 <nre2.html>`_ that wraps Regex so that you can easily replace NRE.
## PCRE library is now at end of life.
## This compatibility module uses PCRE2.
##
## What is NRE?
## ============
##
## A regular expression library for Nim using PCRE to do the hard work.
## A regular expression library for Nim using PCRE2 to do the hard work.
##
## For documentation on how to write patterns, there exists `the official PCRE
## For documentation on how to write patterns, there exists `the official PCRE2
## pattern documentation
## <https://www.pcre.org/original/doc/html/pcrepattern.html>`_. You can also
## <https://www.pcre.org/current/doc/html/pcre2pattern.html>`_. You can also
## search the internet for a wide variety of third-party documentation and
## tools.
##
@@ -39,10 +39,8 @@ runnableExamples:
## Licencing
## ---------
##
## PCRE has `some additional terms`_ that you must agree to in order to use
## this module.
## PCRE2 is distributed under a BSD-style licence.
##
## .. _`some additional terms`: https://pcre.sourceforge.net/license.txt
runnableExamples:
import std/sugar
let vowels = re"[aeoui]"
@@ -66,7 +64,7 @@ runnableExamples:
assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
assert find("uxabc", re"ab", start = 3).isNone
from std/pcre import nil
import std/pcre2 as pcre
import nre/private/util
import std/tables
from std/strutils import `%`
@@ -82,7 +80,6 @@ type
RegexDesc* = object
pattern*: string
pcreObj: ptr pcre.Pcre ## not nil
pcreExtra: ptr pcre.ExtraData ## nil
captureNameToId: Table[string, int]
@@ -93,9 +90,9 @@ type
##
## `pattern: string`
## : the string that was used to create the pattern. For details on how
## to write a pattern, please see `the official PCRE pattern
## to write a pattern, please see `the official PCRE2 pattern
## documentation.
## <https://www.pcre.org/original/doc/html/pcrepattern.html>`_
## <https://www.pcre.org/current/doc/html/pcre2pattern.html>`_
##
## `captureCount: int`
## : the number of captures that the pattern has.
@@ -140,23 +137,23 @@ type
## NEL (next line, U+0085), LS (line separator, U+2028), and PS
## (paragraph separator, U+2029). For the 8-bit library, the last two
## are recognized only in UTF-8 mode.
## man pcre
## -- man pcre2pattern
##
## - `(*JAVASCRIPT_COMPAT)` - JavaScript compatibility
## - `(*NO_STUDY)` - turn off studying; study is enabled by default
##
## For more details on the leading option groups, see the `Option
## Setting <https://man7.org/linux/man-pages/man3/pcresyntax.3.html#OPTION_SETTING>`_
## Setting <https://www.pcre.org/current/doc/html/pcre2syntax.html#SEC16>`_
## and the `Newline
## Convention <https://man7.org/linux/man-pages/man3/pcresyntax.3.html#NEWLINE_CONVENTION>`_
## sections of the `PCRE syntax
## manual <https://man7.org/linux/man-pages/man3/pcresyntax.3.html>`_.
## Convention <https://www.pcre.org/current/doc/html/pcre2syntax.html#SEC17>`_
## sections of the `PCRE2 syntax
## manual <https://www.pcre.org/current/doc/html/pcre2syntax.html>`_.
##
## Some of these options are not part of PCRE and are converted by nre
## into PCRE flags. These include `NEVER_UTF`, `ANCHORED`,
## Some of these options are not part of a pattern and are converted by nre
## into PCRE2 flags. These include `NEVER_UTF`, `ANCHORED`,
## `DOLLAR_ENDONLY`, `FIRSTLINE`, `NO_AUTO_CAPTURE`,
## `JAVASCRIPT_COMPAT`, `U`, `NO_STUDY`. In other PCRE wrappers, you
## will need to pass these as separate flags to PCRE.
## `JAVASCRIPT_COMPAT`, `U`, `NO_STUDY`. In other PCRE2 wrappers, you
## will need to pass these as separate flags to PCRE2.
RegexMatch* = object
## Usually seen as `Option[RegexMatch]`, it represents the result of an
@@ -196,7 +193,7 @@ type
pattern*: Regex ## The regex doing the matching.
## Not nil.
str*: string ## The string that was matched against.
pcreMatchBounds: seq[HSlice[cint, cint]] ## First item is the bounds of the match
pcreMatchBounds: seq[HSlice[csize_t, csize_t]] ## First item is the bounds of the match
## Other items are the captures
## `a` is inclusive start, `b` is exclusive end
@@ -227,38 +224,32 @@ when defined(gcDestructors):
when defined(nimAllowNonVarDestructor) and defined(nimPreviewNonVarDestructor):
proc `=destroy`(pattern: RegexDesc) =
`=destroy`(pattern.pattern)
pcre.free_substring(cast[cstring](pattern.pcreObj))
if pattern.pcreExtra != nil:
pcre.free_study(pattern.pcreExtra)
pcre.code_free(pattern.pcreObj)
`=destroy`(pattern.captureNameToId)
else:
proc `=destroy`(pattern: var RegexDesc) =
`=destroy`(pattern.pattern)
pcre.free_substring(cast[cstring](pattern.pcreObj))
if pattern.pcreExtra != nil:
pcre.free_study(pattern.pcreExtra)
pcre.code_free(pattern.pcreObj)
`=destroy`(pattern.captureNameToId)
else:
proc destroyRegex(pattern: Regex) =
`=destroy`(pattern.pattern)
pcre.free_substring(cast[cstring](pattern.pcreObj))
if pattern.pcreExtra != nil:
pcre.free_study(pattern.pcreExtra)
pcre.code_free(pattern.pcreObj)
`=destroy`(pattern.captureNameToId)
proc getinfo[T](pattern: Regex, opt: cint): T =
proc getinfo[T](pattern: Regex, opt: uint32): T =
result = default(T)
let retcode = pcre.fullinfo(pattern.pcreObj, pattern.pcreExtra, opt, addr result)
let retcode = pcre.pattern_info(pattern.pcreObj, opt, addr result)
if retcode < 0:
# XXX Error message that doesn't expose implementation details
raise newException(FieldDefect, "Invalid getinfo for $1, errno $2" % [$opt, $retcode])
proc getNameToNumberTable(pattern: Regex): Table[string, int] =
let entryCount = getinfo[cint](pattern, pcre.INFO_NAMECOUNT)
let entrySize = getinfo[cint](pattern, pcre.INFO_NAMEENTRYSIZE)
let entryCount = getinfo[uint32](pattern, pcre.INFO_NAMECOUNT).int
let entrySize = getinfo[uint32](pattern, pcre.INFO_NAMEENTRYSIZE).int
let table = cast[ptr UncheckedArray[uint8]](
getinfo[int](pattern, pcre.INFO_NAMETABLE))
getinfo[pointer](pattern, pcre.INFO_NAMETABLE))
result = initTable[string, int]()
@@ -274,61 +265,69 @@ proc getNameToNumberTable(pattern: Regex): Table[string, int] =
result[name] = num
proc initRegex(pattern: string, flags: int, study = true): Regex =
proc pcreErrorMessage(errorCode: cint): string =
var buffer: array[256, uint8]
let length = pcre.get_error_message(errorCode, addr buffer[0], buffer.len.csize_t)
if length >= 0:
result = newString(length)
if length > 0:
copyMem(addr result[0], addr buffer[0], length)
else:
result = $errorCode
proc jitCompile(pattern: ptr pcre.Pcre) =
var hasJit: cint = 0
if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0 and hasJit == 1:
discard pcre.jit_compile(pattern, pcre.JIT_COMPLETE.uint32)
proc initRegex(pattern: string, flags: uint32, study = true): Regex =
when defined(gcDestructors):
result = Regex()
else:
new(result, destroyRegex)
result.pattern = pattern
var errorMsg: cstring = ""
var errOffset: cint = 0
var
errorCode: cint = 0
errOffset: csize_t = 0
result.pcreObj = pcre.compile(cstring(pattern),
# better hope int is at least 4 bytes..
cint(flags), addr errorMsg,
result.pcreObj = pcre.compile(cast[ptr uint8](cstring(pattern)),
pattern.len.csize_t, flags, addr errorCode,
addr errOffset, nil)
if result.pcreObj == nil:
# failed to compile
raise SyntaxError(msg: $errorMsg, pos: errOffset, pattern: pattern)
raise SyntaxError(msg: pcreErrorMessage(errorCode), pos: errOffset.int,
pattern: pattern)
if study:
var options: cint = 0
var hasJit: cint = cint(0)
if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0:
if hasJit == 1'i32:
options = pcre.STUDY_JIT_COMPILE
result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
if errorMsg != nil:
raise StudyError(msg: $errorMsg)
jitCompile(result.pcreObj)
result.captureNameToId = result.getNameToNumberTable()
proc captureCount*(pattern: Regex): int =
return getinfo[cint](pattern, pcre.INFO_CAPTURECOUNT)
return getinfo[uint32](pattern, pcre.INFO_CAPTURECOUNT).int
proc captureNameId*(pattern: Regex): Table[string, int] =
return pattern.captureNameToId
proc matchesCrLf(pattern: Regex): bool =
let flags = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS))
let newlineFlags = flags and (pcre.NEWLINE_CRLF or
pcre.NEWLINE_ANY or
pcre.NEWLINE_ANYCRLF)
if newlineFlags > 0u32:
let newline = getinfo[uint32](pattern, pcre.INFO_NEWLINE)
case newline
of pcre.NEWLINE_CRLF, pcre.NEWLINE_ANY, pcre.NEWLINE_ANYCRLF:
return true
of pcre.NEWLINE_CR, pcre.NEWLINE_LF, pcre.NEWLINE_NUL:
return false
else:
discard
# get flags from build config
var confFlags: cint = cint(0)
var confFlags: uint32 = 0
if pcre.config(pcre.CONFIG_NEWLINE, addr confFlags) != 0:
assert(false, "CONFIG_NEWLINE apparently got screwed up")
case confFlags
of 13: return false
of 10: return false
of (13 shl 8) or 10: return true
of -2: return true
of -1: return true
of pcre.NEWLINE_CR, pcre.NEWLINE_LF, pcre.NEWLINE_NUL: return false
of pcre.NEWLINE_CRLF, pcre.NEWLINE_ANY, pcre.NEWLINE_ANYCRLF: return true
else: return false
@@ -338,7 +337,9 @@ func captures*(pattern: RegexMatch): Captures = return Captures(pattern)
func contains*(pattern: CaptureBounds, i: int): bool =
let pattern = RegexMatch(pattern)
pattern.pcreMatchBounds[i + 1].a != -1
let index = i + 1
index >= 0 and index < pattern.pcreMatchBounds.len and
pattern.pcreMatchBounds[index].a != pcre.UNSET
func contains*(pattern: Captures, i: int): bool =
i in CaptureBounds(pattern)
@@ -349,7 +350,7 @@ func `[]`*(pattern: CaptureBounds, i: int): HSlice[int, int] =
raise newException(IndexDefect, "Group '" & $i & "' was not captured")
let bounds = pattern.pcreMatchBounds[i + 1]
int(bounds.a)..int(bounds.b-1)
int(bounds.a) .. (int(bounds.b) - 1)
func `[]`*(pattern: Captures, i: int): string =
let pattern = RegexMatch(pattern)
@@ -437,8 +438,7 @@ proc `$`*(pattern: RegexMatch): string =
proc `==`*(a, b: Regex): bool =
if not a.isNil and not b.isNil:
return a.pattern == b.pattern and
a.pcreObj == b.pcreObj and
a.pcreExtra == b.pcreExtra
a.pcreObj == b.pcreObj
else:
return system.`==`(a, b)
@@ -453,7 +453,7 @@ const PcreOptions = {
"FIRSTLINE": pcre.FIRSTLINE,
"NO_AUTO_CAPTURE": pcre.NO_AUTO_CAPTURE,
"JAVASCRIPT_COMPAT": pcre.JAVASCRIPT_COMPAT,
"U": pcre.UTF8 or pcre.UCP
"U": pcre.UTF or pcre.UCP
}.toTable
# Options that are supported inside regular expressions themselves
@@ -503,46 +503,63 @@ proc extractOptions(pattern: string): tuple[pattern: string, flags: int, study:
proc re*(pattern: string): Regex =
let (pattern, flags, study) = extractOptions(pattern)
initRegex(pattern, flags, study)
initRegex(pattern, cast[uint32](flags), study)
proc matchImpl(str: string, pattern: Regex, start, endpos: int, flags: int): Option[RegexMatch] =
func isInvalidUnicodeError(errorCode: cint): bool =
(errorCode <= pcre.ERROR_UTF8_ERR1 and errorCode >= pcre.ERROR_UTF8_ERR21) or
errorCode == pcre.ERROR_BADUTFOFFSET or
errorCode == pcre.ERROR_DFA_UINVALID_UTF
proc newMatchData(pattern: Regex): ptr pcre.MatchData =
result = pcre.match_data_create_from_pattern(pattern.pcreObj, nil)
if result == nil:
raise RegexInternalError(msg: "could not allocate PCRE2 match data")
proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32): Option[RegexMatch] =
var myResult = RegexMatch(pattern: pattern, str: str)
# See PCRE man pages.
# 2x capture count to make room for start-end pairs
# 1x capture count as slack space for PCRE
let vecsize = (pattern.captureCount() + 1) * 3
# div 2 because each element is 2 cints long
# plus 1 because we need the ceiling, not the floor
myResult.pcreMatchBounds = newSeq[HSlice[cint, cint]]((vecsize + 1) div 2)
myResult.pcreMatchBounds.setLen(vecsize div 3)
myResult.pcreMatchBounds = newSeq[HSlice[csize_t, csize_t]](pattern.captureCount() + 1)
let strlen = if endpos == int.high: str.len else: endpos+1
doAssert(strlen <= str.len) # don't want buffer overflows
if start < 0 or start > strlen:
return none(RegexMatch)
let matchData = newMatchData(pattern)
defer: pcre.match_data_free(matchData)
let execRet = pcre.match(pattern.pcreObj,
cast[ptr uint8](cstring(str)),
strlen.csize_t,
start.csize_t,
options,
matchData,
nil)
let rawMatches = cast[ptr UncheckedArray[csize_t]](pcre.get_ovector_pointer(matchData))
let ovectorCount = min(myResult.pcreMatchBounds.len,
pcre.get_ovector_count(matchData).int)
for i in 0 ..< ovectorCount:
myResult.pcreMatchBounds[i] = rawMatches[i * 2] .. rawMatches[i * 2 + 1]
let execRet = pcre.exec(pattern.pcreObj,
pattern.pcreExtra,
cstring(str),
cint(strlen),
cint(start),
cint(flags),
cast[ptr cint](addr myResult.pcreMatchBounds[0]),
cint(vecsize))
if execRet >= 0:
return some(myResult)
case execRet:
of pcre.ERROR_NOMATCH:
return none(RegexMatch)
of pcre.ERROR_NULL:
raise newException(AccessViolationDefect, "Expected non-null parameters")
of pcre.ERROR_BADOPTION:
raise RegexInternalError(msg: "Unknown pattern flag. Either a bug or " &
"outdated PCRE.")
of pcre.ERROR_BADUTF8, pcre.ERROR_SHORTUTF8, pcre.ERROR_BADUTF8_OFFSET:
raise InvalidUnicodeError(msg: "Invalid unicode byte sequence",
pos: myResult.pcreMatchBounds[0].a)
if isInvalidUnicodeError(execRet):
let errorPos = if myResult.pcreMatchBounds.len > 0 and
myResult.pcreMatchBounds[0].a != pcre.UNSET:
myResult.pcreMatchBounds[0].a.int
else:
raise RegexInternalError(msg: "Unknown internal error: " & $execRet)
start
raise InvalidUnicodeError(msg: "Invalid unicode byte sequence", pos: errorPos)
case execRet
of pcre.ERROR_NOMATCH:
return none(RegexMatch)
of pcre.ERROR_NULL:
raise newException(AccessViolationDefect, "Expected non-null parameters")
of pcre.ERROR_BADOPTION:
raise RegexInternalError(msg: "Unknown pattern flag. Either a bug or " &
"outdated PCRE2.")
else:
raise RegexInternalError(msg: "Unknown internal error: " & $execRet)
proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
## Like `find(...)<#find,string,Regex,int>`_, but anchored to the start of the
@@ -559,7 +576,7 @@ proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[R
assert 0 in "abc".match(re"(\w)").get.captureBounds
assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
return str.matchImpl(pattern, start, endpos, pcre.ANCHORED)
return str.matchImpl(pattern, start, endpos, cast[uint32](pcre.ANCHORED))
iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): RegexMatch =
## Works the same as `find(...)<#find,string,Regex,int>`_, but finds every
@@ -573,21 +590,21 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
## Variants:
##
## - `proc findAll(...)` returns a `seq[string]`
# see pcredemo for explanation => https://www.pcre.org/original/doc/html/pcredemo.html
# see pcre2demo for explanation => https://www.pcre.org/current/doc/html/pcre2demo.html
let matchesCrLf = pattern.matchesCrLf()
let unicode = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS) and
pcre.UTF8) > 0u32
let unicode = uint32(getinfo[uint32](pattern, pcre.INFO_ALLOPTIONS) and
pcre.UTF.uint32) > 0u32
let strlen = if endpos == int.high: str.len else: endpos+1
var offset = start
var match: Option[RegexMatch] = default(Option[RegexMatch])
var neverMatched = true
while true:
var flags = 0
var flags = 0'u32
if match.isSome and
match.get.matchBounds.a > match.get.matchBounds.b:
# 0-len match
flags = pcre.NOTEMPTY_ATSTART
flags = pcre.NOTEMPTY_ATSTART.uint32
match = str.matchImpl(pattern, offset, endpos, flags)
if match.isNone:
@@ -623,7 +640,7 @@ proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[Re
## `endpos`
## : The maximum index for a match; `int.high` means the end of the
## string, otherwise its an inclusive upper bound.
return str.matchImpl(pattern, start, endpos, 0)
return str.matchImpl(pattern, start, endpos, 0'u32)
proc findAll*(str: string, pattern: Regex, start = 0, endpos = int.high): seq[string] =
result = @[]

View File

@@ -8,27 +8,25 @@
#
when defined(js):
{.error: "This library needs to be compiled with a c-like backend, and depends on PCRE; See jsre for JS backend.".}
{.error: "This library needs to be compiled with a c-like backend, and depends on PCRE2; See jsre for JS backend.".}
## .. warning:: This module is deprecated.
## Use [Regex](https://github.com/nitely/nim-regex).
## PCRE library is now at end of life.
## This compatibility module uses PCRE2.
##
## Regular expression support for Nim.
##
## This module is implemented by providing a wrapper around the
## `PCRE (Perl-Compatible Regular Expressions) <https://www.pcre.org>`_
## C library. This means that your application will depend on the PCRE
## `PCRE2 (Perl-Compatible Regular Expressions) <https://www.pcre.org>`_
## C library. This means that your application will depend on the PCRE2
## library's licence when using this module, which should not be a problem
## though.
##
## .. note:: There are also alternative nimble packages such as [tinyre](https://github.com/khchen/tinyre)
## and [regex](https://github.com/nitely/nim-regex).
##
## PCRE's licence follows:
##
## .. include:: ../../doc/regexprs.txt
##
## PCRE2 is distributed under a BSD-style licence.
runnableExamples:
## Unless specified otherwise, `start` parameter in each proc indicates
@@ -40,7 +38,7 @@ runnableExamples:
# can't match start of string since we're starting at 1
import
std/[pcre, strutils, rtarrays]
std/[pcre2, strutils]
when defined(nimPreviewSlimSystem):
import std/syncio
@@ -60,8 +58,7 @@ type
## expression will be used only once)
RegexDesc = object
h: ptr Pcre
e: ptr ExtraData
h: ptr pcre2.Pcre
Regex* = ref RegexDesc ## a compiled regular expression
@@ -71,14 +68,10 @@ type
when defined(gcDestructors):
when defined(nimAllowNonVarDestructor):
proc `=destroy`(x: RegexDesc) =
pcre.free_substring(cast[cstring](x.h))
if not isNil(x.e):
pcre.free_study(x.e)
pcre2.code_free(x.h)
else:
proc `=destroy`(x: var RegexDesc) =
pcre.free_substring(cast[cstring](x.h))
if not isNil(x.e):
pcre.free_study(x.e)
pcre2.code_free(x.h)
proc raiseInvalidRegex(msg: string) {.noinline, noreturn.} =
var e: ref RegexError
@@ -86,21 +79,43 @@ proc raiseInvalidRegex(msg: string) {.noinline, noreturn.} =
e.msg = msg
raise e
proc rawCompile(pattern: string, flags: cint): ptr Pcre =
proc pcre2ErrorMessage(errorCode: cint): string =
var buffer: array[256, uint8]
let length = pcre2.get_error_message(errorCode, addr buffer[0], buffer.len.csize_t)
if length >= 0:
result = newString(length)
if length > 0:
copyMem(addr result[0], addr buffer[0], length)
else:
result = $errorCode
proc rawCompile(pattern: string, options: uint32): ptr pcre2.Pcre =
var
msg: cstring = ""
offset: cint = 0
result = pcre.compile(pattern, flags, addr(msg), addr(offset), nil)
errorCode: cint = 0
offset: csize_t = 0
result = pcre2.compile(cast[ptr uint8](pattern.cstring), pattern.len.csize_t,
options, addr errorCode, addr offset, nil)
if result == nil:
raiseInvalidRegex($msg & "\n" & pattern & "\n" & spaces(offset) & "^\n")
raiseInvalidRegex(pcre2ErrorMessage(errorCode) & "\n" & pattern & "\n" &
spaces(offset.int) & "^\n")
proc finalizeRegEx(x: Regex) =
# XXX This is a hack, but PCRE does not export its "free" function properly.
# Sigh. The hack relies on PCRE's implementation (see `pcre_get.c`).
# Fortunately the implementation is unlikely to change.
pcre.free_substring(cast[cstring](x.h))
if not isNil(x.e):
pcre.free_study(x.e)
pcre2.code_free(x.h)
func toPcre2Options(flags: set[RegexFlag]): uint32 =
if reIgnoreCase in flags:
result = result or pcre2.CASELESS.uint32
if reMultiLine in flags:
result = result or pcre2.MULTILINE.uint32
if reDotAll in flags:
result = result or pcre2.DOTALL.uint32
if reExtended in flags:
result = result or pcre2.EXTENDED.uint32
proc jitCompile(pattern: ptr pcre2.Pcre) =
var hasJit: cint = 0
if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0 and hasJit == 1:
discard pcre2.jit_compile(pattern, pcre2.JIT_COMPLETE.uint32)
proc re*(s: string, flags = {reStudy}): Regex =
## Constructor of regular expressions.
@@ -116,16 +131,9 @@ proc re*(s: string, flags = {reStudy}): Regex =
result = Regex()
else:
new(result, finalizeRegEx)
result.h = rawCompile(s, cast[cint](flags - {reStudy}))
result.h = rawCompile(s, toPcre2Options(flags))
if reStudy in flags:
var msg: cstring = ""
var options: cint = 0
var hasJit: cint = 0
if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0:
if hasJit == 1'i32:
options = pcre.STUDY_JIT_COMPILE
result.e = pcre.study(result.h, options, addr msg)
if not isNil(msg): raiseInvalidRegex($msg)
jitCompile(result.h)
proc rex*(s: string, flags = {reStudy, reExtended}): Regex =
## Constructor for extended regular expressions.
@@ -142,25 +150,58 @@ proc bufSubstr(b: cstring, sPos, ePos: int): string {.inline.} =
copyMem(addr(result[0]), unsafeAddr(b[sPos]), sz)
result.setLen(sz)
proc matchOrFind(buf: cstring, pattern: Regex, matches: var openArray[string],
start, bufSize, flags: cint): cint =
var
rtarray = initRtArray[cint]((matches.len+1)*3)
rawMatches = rtarray.getRawData
res = pcre.exec(pattern.h, pattern.e, buf, bufSize, start, flags,
cast[ptr cint](rawMatches), (matches.len+1).cint*3)
if res < 0'i32: return res
for i in 1..int(res)-1:
var a = rawMatches[i * 2]
var b = rawMatches[i * 2 + 1]
if a >= 0'i32:
matches[i-1] = bufSubstr(buf, int(a), int(b))
else: matches[i-1] = ""
return rawMatches[1] - rawMatches[0]
proc newMatchData(slots: int): ptr pcre2.MatchData =
result = pcre2.match_data_create(max(slots, 1).uint32, nil)
if result == nil:
raiseInvalidRegex("could not allocate PCRE2 match data")
const MaxReBufSize* = high(cint)
## Maximum PCRE (API 1) buffer start/size equal to `high(cint)`, which even
## for 64-bit systems can be either 2`31`:sup:-1 or 2`63`:sup:-1.
template ovector(matchData: ptr pcre2.MatchData): ptr UncheckedArray[csize_t] =
cast[ptr UncheckedArray[csize_t]](pcre2.get_ovector_pointer(matchData))
proc rawMatch(buf: cstring, pattern: Regex, start, bufSize: int,
options: uint32, matchData: ptr pcre2.MatchData): cint =
if start < 0 or bufSize < 0:
return pcre2.ERROR_BADOFFSET
pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t,
start.csize_t, options, matchData, nil)
proc copyStringMatches(buf: cstring, rawMatches: ptr UncheckedArray[csize_t],
captureCount: int, matches: var openArray[string]) =
let upper = min(captureCount - 1, matches.len)
if upper > 0:
for i in 1 .. upper:
let matchStart = rawMatches[i * 2]
let matchEnd = rawMatches[i * 2 + 1]
if matchStart != pcre2.UNSET:
matches[i-1] = bufSubstr(buf, int(matchStart), int(matchEnd))
else:
matches[i-1] = ""
proc copyBoundsMatches(rawMatches: ptr UncheckedArray[csize_t],
captureCount: int,
matches: var openArray[tuple[first, last: int]]) =
let upper = min(captureCount - 1, matches.len)
if upper > 0:
for i in 1 .. upper:
let matchStart = rawMatches[i * 2]
let matchEnd = rawMatches[i * 2 + 1]
if matchStart != pcre2.UNSET:
matches[i-1] = (int(matchStart), int(matchEnd) - 1)
else:
matches[i-1] = (-1, 0)
proc matchOrFind(buf: cstring, pattern: Regex, matches: var openArray[string],
start, bufSize: int, options: uint32): int =
let matchData = newMatchData(matches.len + 1)
defer: pcre2.match_data_free(matchData)
let res = rawMatch(buf, pattern, start, bufSize, options, matchData)
let rawMatches = ovector(matchData)
if res < 0: return int(res)
copyStringMatches(buf, rawMatches, int(res), matches)
return int(rawMatches[1]) - int(rawMatches[0])
const MaxReBufSize* = high(int)
## Maximum PCRE2 buffer start/size accepted by this Nim API.
proc findBounds*(buf: cstring, pattern: Regex, matches: var openArray[string],
start = 0, bufSize: int): tuple[first, last: int] =
@@ -172,17 +213,12 @@ proc findBounds*(buf: cstring, pattern: Regex, matches: var openArray[string],
##
## Note: The memory for `matches` needs to be allocated before this function is
## called, otherwise it will just remain empty.
var
rtarray = initRtArray[cint]((matches.len+1)*3)
rawMatches = rtarray.getRawData
res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
cast[ptr cint](rawMatches), (matches.len+1).cint*3)
if res < 0'i32: return (-1, 0)
for i in 1..int(res)-1:
var a = rawMatches[i * 2]
var b = rawMatches[i * 2 + 1]
if a >= 0'i32: matches[i-1] = bufSubstr(buf, int(a), int(b))
else: matches[i-1] = ""
let matchData = newMatchData(matches.len + 1)
defer: pcre2.match_data_free(matchData)
let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
let rawMatches = ovector(matchData)
if res < 0: return (-1, 0)
copyStringMatches(buf, rawMatches, int(res), matches)
return (rawMatches[0].int, rawMatches[1].int - 1)
proc findBounds*(s: string, pattern: Regex, matches: var openArray[string],
@@ -212,17 +248,12 @@ proc findBounds*(buf: cstring, pattern: Regex,
## `(-1,0)` is returned.
##
## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
var
rtarray = initRtArray[cint]((matches.len+1)*3)
rawMatches = rtarray.getRawData
res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
cast[ptr cint](rawMatches), (matches.len+1).cint*3)
if res < 0'i32: return (-1, 0)
for i in 1..int(res)-1:
var a = rawMatches[i * 2]
var b = rawMatches[i * 2 + 1]
if a >= 0'i32: matches[i-1] = (int(a), int(b)-1)
else: matches[i-1] = (-1,0)
let matchData = newMatchData(matches.len + 1)
defer: pcre2.match_data_free(matchData)
let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
let rawMatches = ovector(matchData)
if res < 0: return (-1, 0)
copyBoundsMatches(rawMatches, int(res), matches)
return (rawMatches[0].int, rawMatches[1].int - 1)
proc findBounds*(s: string, pattern: Regex,
@@ -244,29 +275,28 @@ proc findBounds*(s: string, pattern: Regex,
min(start, MaxReBufSize), min(s.len, MaxReBufSize))
proc findBoundsImpl(buf: cstring, pattern: Regex,
start = 0, bufSize = 0, flags = 0): tuple[first, last: int] =
var rtarray = initRtArray[cint](3)
let rawMatches = rtarray.getRawData
let res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, flags.int32,
cast[ptr cint](rawMatches), 3)
if res < 0'i32:
start = 0, bufSize = 0,
options = 0'u32): tuple[first, last: int] =
let matchData = newMatchData(1)
defer: pcre2.match_data_free(matchData)
let res = rawMatch(buf, pattern, start, bufSize, options, matchData)
let rawMatches = ovector(matchData)
if res < 0:
result = (-1, 0)
else:
result = (int(rawMatches[0]), int(rawMatches[1]-1))
result = (int(rawMatches[0]), int(rawMatches[1]) - 1)
proc findBounds*(buf: cstring, pattern: Regex,
start = 0, bufSize: int): tuple[first, last: int] =
## returns the `first` and `last` position of `pattern` in `buf`,
## where `buf` has length `bufSize` (not necessarily `'\0'` terminated).
## If it does not match, `(-1,0)` is returned.
var
rtarray = initRtArray[cint](3)
rawMatches = rtarray.getRawData
res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
cast[ptr cint](rawMatches), 3)
if res < 0'i32: return (int(res), 0)
return (int(rawMatches[0]), int(rawMatches[1]-1))
let matchData = newMatchData(1)
defer: pcre2.match_data_free(matchData)
let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
let rawMatches = ovector(matchData)
if res < 0: return (int(res), 0)
return (int(rawMatches[0]), int(rawMatches[1]) - 1)
proc findBounds*(s: string, pattern: Regex,
start = 0): tuple[first, last: int] {.inline.} =
@@ -279,14 +309,16 @@ proc findBounds*(s: string, pattern: Regex,
result = findBounds(cstring(s), pattern,
min(start, MaxReBufSize), min(s.len, MaxReBufSize))
proc matchOrFind(buf: cstring, pattern: Regex, start, bufSize: int, flags: cint): cint =
var
rtarray = initRtArray[cint](3)
rawMatches = rtarray.getRawData
result = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, flags,
cast[ptr cint](rawMatches), 3)
if result >= 0'i32:
result = rawMatches[1] - rawMatches[0]
proc matchOrFind(buf: cstring, pattern: Regex, start, bufSize: int,
options: uint32): int =
let matchData = newMatchData(1)
defer: pcre2.match_data_free(matchData)
let res = rawMatch(buf, pattern, start, bufSize, options, matchData)
if res >= 0:
let rawMatches = ovector(matchData)
result = int(rawMatches[1]) - int(rawMatches[0])
else:
result = int(res)
proc matchLen*(s: string, pattern: Regex, matches: var openArray[string],
start = 0): int {.inline.} =
@@ -295,7 +327,7 @@ proc matchLen*(s: string, pattern: Regex, matches: var openArray[string],
## of zero can happen.
##
## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
result = matchOrFind(cstring(s), pattern, matches, start.cint, s.len.cint, pcre.ANCHORED)
result = matchOrFind(cstring(s), pattern, matches, start, s.len, cast[uint32](pcre2.ANCHORED))
proc matchLen*(buf: cstring, pattern: Regex, matches: var openArray[string],
start = 0, bufSize: int): int {.inline.} =
@@ -304,7 +336,7 @@ proc matchLen*(buf: cstring, pattern: Regex, matches: var openArray[string],
## of zero can happen.
##
## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
return matchOrFind(buf, pattern, matches, start.cint, bufSize.cint, pcre.ANCHORED)
return matchOrFind(buf, pattern, matches, start, bufSize, cast[uint32](pcre2.ANCHORED))
proc matchLen*(s: string, pattern: Regex, start = 0): int {.inline.} =
## the same as `match`, but it returns the length of the match,
@@ -315,13 +347,13 @@ proc matchLen*(s: string, pattern: Regex, start = 0): int {.inline.} =
doAssert matchLen("abcdefg", re"cde", 2) == 3
doAssert matchLen("abcdefg", re"abcde") == 5
doAssert matchLen("abcdefg", re"cde") == -1
result = matchOrFind(cstring(s), pattern, start.cint, s.len.cint, pcre.ANCHORED)
result = matchOrFind(cstring(s), pattern, start, s.len, cast[uint32](pcre2.ANCHORED))
proc matchLen*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int {.inline.} =
## the same as `match`, but it returns the length of the match,
## if there is no match, `-1` is returned. Note that a match length
## of zero can happen.
result = matchOrFind(buf, pattern, start.cint, bufSize, pcre.ANCHORED)
result = matchOrFind(buf, pattern, start, bufSize, cast[uint32](pcre2.ANCHORED))
proc match*(s: string, pattern: Regex, start = 0): bool {.inline.} =
## returns `true` if `s[start..]` matches the `pattern`.
@@ -361,18 +393,13 @@ proc find*(buf: cstring, pattern: Regex, matches: var openArray[string],
## `buf` has length `bufSize` (not necessarily `'\0'` terminated).
##
## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
var
rtarray = initRtArray[cint]((matches.len+1)*3)
rawMatches = rtarray.getRawData
res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
cast[ptr cint](rawMatches), (matches.len+1).cint*3)
if res < 0'i32: return res
for i in 1..int(res)-1:
var a = rawMatches[i * 2]
var b = rawMatches[i * 2 + 1]
if a >= 0'i32: matches[i-1] = bufSubstr(buf, int(a), int(b))
else: matches[i-1] = ""
return rawMatches[0]
let matchData = newMatchData(matches.len + 1)
defer: pcre2.match_data_free(matchData)
let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
let rawMatches = ovector(matchData)
if res < 0: return int(res)
copyStringMatches(buf, rawMatches, int(res), matches)
return int(rawMatches[0])
proc find*(s: string, pattern: Regex, matches: var openArray[string],
start = 0): int {.inline.} =
@@ -387,13 +414,12 @@ proc find*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int =
## returns the starting position of `pattern` in `buf`,
## where `buf` has length `bufSize` (not necessarily `'\0'` terminated).
## If it does not match, `-1` is returned.
var
rtarray = initRtArray[cint](3)
rawMatches = rtarray.getRawData
res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
cast[ptr cint](rawMatches), 3)
if res < 0'i32: return res
return rawMatches[0]
let matchData = newMatchData(1)
defer: pcre2.match_data_free(matchData)
let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
let rawMatches = ovector(matchData)
if res < 0: return int(res)
return int(rawMatches[0])
proc find*(s: string, pattern: Regex, start = 0): int {.inline.} =
## returns the starting position of `pattern` in `s`. If it does not
@@ -413,40 +439,38 @@ iterator findAll*(s: string, pattern: Regex, start = 0): string =
##
## Note that since this is an iterator you should not modify the string you
## are iterating over: bad things could happen.
var
i = int32(start)
rtarray = initRtArray[cint](3)
rawMatches = rtarray.getRawData
var i = start
let matchData = newMatchData(1)
defer: pcre2.match_data_free(matchData)
while true:
let res = pcre.exec(pattern.h, pattern.e, s, len(s).cint, i, 0'i32,
cast[ptr cint](rawMatches), 3)
if res < 0'i32: break
let a = rawMatches[0]
let b = rawMatches[1]
if a == b and a == i: break
yield substr(s, int(a), int(b)-1)
i = b
let res = rawMatch(s.cstring, pattern, i, len(s), 0'u32, matchData)
if res < 0: break
let rawMatches = ovector(matchData)
let matchStart = rawMatches[0]
let matchEnd = rawMatches[1]
if matchStart == matchEnd and matchStart.int == i: break
yield substr(s, int(matchStart), int(matchEnd) - 1)
i = matchEnd.int
iterator findAll*(buf: cstring, pattern: Regex, start = 0, bufSize: int): string =
## Yields all matching `substrings` of `s` that match `pattern`.
##
## Note that since this is an iterator you should not modify the string you
## are iterating over: bad things could happen.
var
i = int32(start)
rtarray = initRtArray[cint](3)
rawMatches = rtarray.getRawData
var i = start
let matchData = newMatchData(1)
defer: pcre2.match_data_free(matchData)
while true:
let res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, i, 0'i32,
cast[ptr cint](rawMatches), 3)
if res < 0'i32: break
let a = rawMatches[0]
let b = rawMatches[1]
if a == b and a == i: break
var str = newString(b-a)
copyMem(str[0].addr, unsafeAddr(buf[a]), b-a)
let res = rawMatch(buf, pattern, i, bufSize, 0'u32, matchData)
if res < 0: break
let rawMatches = ovector(matchData)
let matchStart = rawMatches[0]
let matchEnd = rawMatches[1]
if matchStart == matchEnd and matchStart.int == i: break
var str = newString(int(matchEnd - matchStart))
copyMem(str[0].addr, unsafeAddr(buf[int(matchStart)]), int(matchEnd - matchStart))
yield str
i = b
i = matchEnd.int
proc findAll*(s: string, pattern: Regex, start = 0): seq[string] {.inline.} =
## returns all matching `substrings` of `s` that match `pattern`.
@@ -503,7 +527,7 @@ proc replace*(s: string, sub: Regex, by = ""): string =
doAssert "var1=key; var2=key2".replace(re"(\w+)=(\w+)", "?") == "?; ?"
result = ""
var prev = 0
var flags = int32(0)
var flags = 0'u32
while prev < s.len:
var match = findBoundsImpl(s.cstring, sub, prev, s.len, flags)
flags = 0
@@ -512,7 +536,7 @@ proc replace*(s: string, sub: Regex, by = ""): string =
add(result, by)
if match.first > match.last:
# 0-len match
flags = pcre.NOTEMPTY_ATSTART
flags = pcre2.NOTEMPTY_ATSTART.uint32
prev = match.last + 1
add(result, substr(s, prev))

260
lib/wrappers/pcre2.nim Normal file
View File

@@ -0,0 +1,260 @@
#
# Nim's Runtime Library
# (c) Copyright 2026 Nim Contributors
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Wrapper for the 8-bit PCRE2 API.
when sizeof(int) == 4:
const ANCHORED* = low(int)
else:
const ANCHORED* = int(0x80000000)
const
NO_UTF_CHECK* = int(0x40000000)
ENDANCHORED* = int(0x20000000)
const
ALLOW_EMPTY_CLASS* = 0x00000001
ALT_BSUX* = 0x00000002
AUTO_CALLOUT* = 0x00000004
CASELESS* = 0x00000008
DOLLAR_ENDONLY* = 0x00000010
DOTALL* = 0x00000020
DUPNAMES* = 0x00000040
EXTENDED* = 0x00000080
FIRSTLINE* = 0x00000100
MATCH_UNSET_BACKREF* = 0x00000200
MULTILINE* = 0x00000400
NEVER_UCP* = 0x00000800
NEVER_UTF* = 0x00001000
NO_AUTO_CAPTURE* = 0x00002000
NO_AUTO_POSSESS* = 0x00004000
NO_DOTSTAR_ANCHOR* = 0x00008000
NO_START_OPTIMIZE* = 0x00010000
NO_START_OPTIMISE* = NO_START_OPTIMIZE
UCP* = 0x00020000
UNGREEDY* = 0x00040000
UTF* = 0x00080000
UTF8* = UTF
NEVER_BACKSLASH_C* = 0x00100000
ALT_CIRCUMFLEX* = 0x00200000
ALT_VERBNAMES* = 0x00400000
USE_OFFSET_LIMIT* = 0x00800000
EXTENDED_MORE* = 0x01000000
LITERAL* = 0x02000000
MATCH_INVALID_UTF* = 0x04000000
ALT_EXTENDED_CLASS* = 0x08000000
## PCRE2 no longer exposes PCRE's `JAVASCRIPT_COMPAT` option. `ALT_BSUX`
## preserves the most important JavaScript-style escape handling.
JAVASCRIPT_COMPAT* = ALT_BSUX
const
JIT_COMPLETE* = 0x00000001
JIT_PARTIAL_SOFT* = 0x00000002
JIT_PARTIAL_HARD* = 0x00000004
JIT_INVALID_UTF* = 0x00000100
JIT_TEST_ALLOC* = 0x00000200
const
NOTBOL* = 0x00000001
NOTEOL* = 0x00000002
NOTEMPTY* = 0x00000004
NOTEMPTY_ATSTART* = 0x00000008
PARTIAL_SOFT* = 0x00000010
PARTIAL_HARD* = 0x00000020
DFA_RESTART* = 0x00000040
DFA_SHORTEST* = 0x00000080
NO_JIT* = 0x00002000
COPY_MATCHED_SUBJECT* = 0x00004000
DISABLE_RECURSELOOP_CHECK* = 0x00040000
const
NEWLINE_CR* = 1
NEWLINE_LF* = 2
NEWLINE_CRLF* = 3
NEWLINE_ANY* = 4
NEWLINE_ANYCRLF* = 5
NEWLINE_NUL* = 6
BSR_UNICODE* = 1
BSR_ANYCRLF* = 2
const
ERROR_NOMATCH* = -1
ERROR_PARTIAL* = -2
ERROR_UTF8_ERR1* = -3
ERROR_UTF8_ERR2* = -4
ERROR_UTF8_ERR3* = -5
ERROR_UTF8_ERR4* = -6
ERROR_UTF8_ERR5* = -7
ERROR_UTF8_ERR6* = -8
ERROR_UTF8_ERR7* = -9
ERROR_UTF8_ERR8* = -10
ERROR_UTF8_ERR9* = -11
ERROR_UTF8_ERR10* = -12
ERROR_UTF8_ERR11* = -13
ERROR_UTF8_ERR12* = -14
ERROR_UTF8_ERR13* = -15
ERROR_UTF8_ERR14* = -16
ERROR_UTF8_ERR15* = -17
ERROR_UTF8_ERR16* = -18
ERROR_UTF8_ERR17* = -19
ERROR_UTF8_ERR18* = -20
ERROR_UTF8_ERR19* = -21
ERROR_UTF8_ERR20* = -22
ERROR_UTF8_ERR21* = -23
ERROR_BADDATA* = -29
ERROR_MIXEDTABLES* = -30
ERROR_BADMAGIC* = -31
ERROR_BADMODE* = -32
ERROR_BADOFFSET* = -33
ERROR_BADOPTION* = -34
ERROR_BADREPLACEMENT* = -35
ERROR_BADUTFOFFSET* = -36
ERROR_CALLOUT* = -37
ERROR_INTERNAL* = -44
ERROR_JIT_BADOPTION* = -45
ERROR_JIT_STACKLIMIT* = -46
ERROR_MATCHLIMIT* = -47
ERROR_NOMEMORY* = -48
ERROR_NOSUBSTRING* = -49
ERROR_NULL* = -51
ERROR_RECURSELOOP* = -52
ERROR_DEPTHLIMIT* = -53
ERROR_RECURSIONLIMIT* = ERROR_DEPTHLIMIT
ERROR_UNAVAILABLE* = -54
ERROR_UNSET* = -55
ERROR_BADOFFSETLIMIT* = -56
ERROR_HEAPLIMIT* = -63
ERROR_DFA_UINVALID_UTF* = -66
ERROR_INVALIDOFFSET* = -67
ERROR_JIT_UNSUPPORTED* = -68
const
INFO_ALLOPTIONS* = 0
INFO_ARGOPTIONS* = 1
INFO_BACKREFMAX* = 2
INFO_BSR* = 3
INFO_CAPTURECOUNT* = 4
INFO_FIRSTCODEUNIT* = 5
INFO_FIRSTCODETYPE* = 6
INFO_FIRSTBITMAP* = 7
INFO_HASCRORLF* = 8
INFO_JCHANGED* = 9
INFO_JITSIZE* = 10
INFO_LASTCODEUNIT* = 11
INFO_LASTCODETYPE* = 12
INFO_MATCHEMPTY* = 13
INFO_MATCHLIMIT* = 14
INFO_MAXLOOKBEHIND* = 15
INFO_MINLENGTH* = 16
INFO_NAMECOUNT* = 17
INFO_NAMEENTRYSIZE* = 18
INFO_NAMETABLE* = 19
INFO_NEWLINE* = 20
INFO_DEPTHLIMIT* = 21
INFO_RECURSIONLIMIT* = INFO_DEPTHLIMIT
INFO_SIZE* = 22
INFO_HASBACKSLASHC* = 23
INFO_FRAMESIZE* = 24
INFO_HEAPLIMIT* = 25
INFO_EXTRAOPTIONS* = 26
const
CONFIG_BSR* = 0
CONFIG_JIT* = 1
CONFIG_JITTARGET* = 2
CONFIG_LINKSIZE* = 3
CONFIG_MATCHLIMIT* = 4
CONFIG_NEWLINE* = 5
CONFIG_PARENSLIMIT* = 6
CONFIG_DEPTHLIMIT* = 7
CONFIG_RECURSIONLIMIT* = CONFIG_DEPTHLIMIT
CONFIG_STACKRECURSE* = 8
CONFIG_UNICODE* = 9
CONFIG_UNICODE_VERSION* = 10
CONFIG_VERSION* = 11
CONFIG_HEAPLIMIT* = 12
CONFIG_NEVER_BACKSLASH_C* = 13
CONFIG_COMPILED_WIDTHS* = 14
CONFIG_TABLES_LENGTH* = 15
const
ZERO_TERMINATED* = not 0.csize_t
UNSET* = not 0.csize_t
type
Pcre* = object
CompileContext* = object
GeneralContext* = object
MatchContext* = object
MatchData* = object
JitStack* = object
when not defined(usePcreHeader):
when hostOS == "windows":
const pcre2Dll = "pcre2-8.dll"
elif hostOS == "macosx":
const pcre2Dll = "libpcre2-8(.0|).dylib"
else:
const pcre2Dll = "libpcre2-8.so(.0|)"
{.push dynlib: pcre2Dll.}
else:
{.passC: "-DPCRE2_CODE_UNIT_WIDTH=8".}
{.push header: "<pcre2.h>".}
{.push cdecl, importc: "pcre2_$1_8".}
proc compile*(pattern: ptr uint8,
length: csize_t,
options: uint32,
errorCode: ptr cint,
errorOffset: ptr csize_t,
context: ptr CompileContext): ptr Pcre
proc code_free*(code: ptr Pcre)
proc config*(what: uint32,
where: pointer): cint
proc get_error_message*(errorCode: cint,
buffer: ptr uint8,
bufferLength: csize_t): cint
proc match*(code: ptr Pcre,
subject: ptr uint8,
length: csize_t,
startOffset: csize_t,
options: uint32,
matchData: ptr MatchData,
context: ptr MatchContext): cint
proc match_data_create*(oveccount: uint32,
context: ptr GeneralContext): ptr MatchData
proc match_data_create_from_pattern*(code: ptr Pcre,
context: ptr GeneralContext): ptr MatchData
proc match_data_free*(matchData: ptr MatchData)
proc get_ovector_pointer*(matchData: ptr MatchData): ptr csize_t
proc get_ovector_count*(matchData: ptr MatchData): uint32
proc pattern_info*(code: ptr Pcre,
what: uint32,
where: pointer): cint
proc jit_compile*(code: ptr Pcre,
options: uint32): cint
proc jit_free_unused_memory*()
{.pop.}
{.pop.}

View File

@@ -110,7 +110,7 @@ image: freebsd/latest
packages:
- databases/sqlite3
- devel/boehm-gc-threaded
- devel/pcre
- devel/pcre2
- devel/sdl20
- devel/sfml
- www/node
@@ -124,7 +124,7 @@ packages:
- sqlite3
- node
- boehm-gc
- pcre
- pcre2
- sfml
- sdl2
- libffi

View File

@@ -126,6 +126,7 @@ mm.md
withoutIndex = """
lib/wrappers/tinyc.nim
lib/wrappers/pcre.nim
lib/wrappers/pcre2.nim
lib/wrappers/openssl.nim
lib/posix/posix.nim
lib/posix/linux.nim

View File

@@ -729,7 +729,7 @@ iterator searchFile(pattern: Pattern; buffer: string): Output =
i = t.last+1
when typeof(pattern) is Regex:
if buffer.len > MaxReBufSize:
yield Output(kind: openError, msg: "PCRE size limit is " & $MaxReBufSize)
yield Output(kind: openError, msg: "PCRE2 size limit is " & $MaxReBufSize)
func detectBin(buffer: string): bool =
for i in 0 ..< min(1024, buffer.len):