test pcre2

2026-05-28 15:55:14 +00:00 · 2026-05-27 19:34:03 +08:00
parent 8771451701
commit b85c6bc9ec
10 changed files with 570 additions and 265 deletions
--- a/.github/workflows/ci_packages.yml
+++ b/.github/workflows/ci_packages.yml
@@ -49,7 +49,7 @@ jobs:
          DEBIAN_FRONTEND='noninteractive' \
            sudo apt-get install --no-install-recommends -yq \
              libcurl4-openssl-dev libgc-dev libsdl1.2-dev libsfml-dev \
-              valgrind libc6-dbg libblas-dev liblapack-dev libpcre3 xorg-dev
+              valgrind libc6-dbg libblas-dev liblapack-dev libpcre2-dev xorg-dev
      - name: 'Install dependencies (macOS)'
        if: runner.os == 'macOS'
        run: brew install boehmgc make sfml gtk+3
--- a/changelog.md
+++ b/changelog.md
@@ -78,8 +78,8 @@ parameter and result types, not just their source-level shape. Use
 - `min`, `max`, and `sequtils`' `minIndex`, `maxIndex` and `minmax` for `openArray`s now accept a comparison function.
 - `system.substr` implementation now uses `copymem` (wrapped C `memcpy`) for copying data, if available at compilation.
 - `system.newStringUninit` is now considered free of side-effects allowing it to be used with `--experimental:strictFuncs`.
- `std/re` and `std/nre` are deprecated as PCRE library is obsolete.
-  Use https://github.com/nitely/nim-regex or `std/nre2`.
+- `std/re` and `std/nre` now use PCRE2. They remain deprecated;
+  use https://github.com/nitely/nim-regex or `std/nre2`.
  See: https://github.com/nim-lang/Nim/issues/23668.
 - `std/pegs` now correctly lexes UTF-8 bytes inside bare identifier-style
  terminals, so case-insensitive matching of non-ASCII terms (e.g. ``\i café``)
--- a/doc/lib.md
+++ b/doc/lib.md
@@ -596,12 +596,12 @@ Regular expressions

 * [re](re.html)
  Procedures and operators for handling regular
-  expressions. The current implementation uses PCRE.
+  expressions. The current implementation uses PCRE2.

 * [nre](nre.html)

  Many help functions for handling regular expressions.
-  The current implementation uses PCRE.
+  The current implementation uses PCRE2.

 Database support
 ----------------
@@ -661,6 +661,9 @@ Regular expressions
 * [pcre](pcre.html)
  Wrapper for the PCRE library.

+* [pcre2](pcre2.html)
+  Wrapper for the PCRE2 library.
+

 Database support
 ----------------
--- a/doc/nimgrep.md
+++ b/doc/nimgrep.md
@@ -86,7 +86,7 @@ That means you can always use only 1 such an option with logical OR, e.g.
 Meaning of `^`:literal: and `$`:literal:
 ========================================

-`nimgrep`:cmd: PCRE engine is run in a single-line mode so
+`nimgrep`:cmd: PCRE2 engine is run in a single-line mode so
 `^`:literal: matches the beginning of whole input *file* and
 `$`:literal: matches the end of *file* (or whole input *string* for
 options like `--filename`).
@@ -97,7 +97,7 @@ Add the `(?m)`:literal: modifier to the beginning of your pattern for
 Examples
 ========

-All examples below use default PCRE Regex patterns:
+All examples below use default PCRE2 Regex patterns:

 + To search recursively in Nim files using style-insensitive identifiers:

--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -7,21 +7,21 @@
 #

 when defined(js):
-  {.error: "This library needs to be compiled with a c-like backend, and depends on PCRE; See jsre for JS backend.".}
+  {.error: "This library needs to be compiled with a c-like backend, and depends on PCRE2; See jsre for JS backend.".}

 ## .. warning:: NRE is deprecated.
 ##   Use [Regex](https://github.com/nitely/nim-regex) or
 ##   `NRE2 <nre2.html>`_ that wraps Regex so that you can easily replace NRE.
-##   PCRE library is now at end of life.
+##   This compatibility module uses PCRE2.
 ##
 ## What is NRE?
 ## ============
 ##
-## A regular expression library for Nim using PCRE to do the hard work.
+## A regular expression library for Nim using PCRE2 to do the hard work.
 ##
-## For documentation on how to write patterns, there exists `the official PCRE
+## For documentation on how to write patterns, there exists `the official PCRE2
 ## pattern documentation
-## <https://www.pcre.org/original/doc/html/pcrepattern.html>`_. You can also
+## <https://www.pcre.org/current/doc/html/pcre2pattern.html>`_. You can also
 ## search the internet for a wide variety of third-party documentation and
 ## tools.
 ##
@@ -39,10 +39,8 @@ runnableExamples:
 ## Licencing
 ## ---------
 ##
-## PCRE has `some additional terms`_ that you must agree to in order to use
-## this module.
+## PCRE2 is distributed under a BSD-style licence.
 ##
-## .. _`some additional terms`: https://pcre.sourceforge.net/license.txt
 runnableExamples:
  import std/sugar
  let vowels = re"[aeoui]"
@@ -66,7 +64,7 @@ runnableExamples:
  assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
  assert find("uxabc", re"ab", start = 3).isNone

-from std/pcre import nil
+import std/pcre2 as pcre
 import nre/private/util
 import std/tables
 from std/strutils import `%`
@@ -82,7 +80,6 @@ type
  RegexDesc* = object
    pattern*: string
    pcreObj: ptr pcre.Pcre  ## not nil
-    pcreExtra: ptr pcre.ExtraData  ## nil

    captureNameToId: Table[string, int]

@@ -93,9 +90,9 @@ type
    ##
    ## `pattern: string`
    ## :   the string that was used to create the pattern. For details on how
-    ##     to write a pattern, please see `the official PCRE pattern
+    ##     to write a pattern, please see `the official PCRE2 pattern
    ##     documentation.
-    ##     <https://www.pcre.org/original/doc/html/pcrepattern.html>`_
+    ##     <https://www.pcre.org/current/doc/html/pcre2pattern.html>`_
    ##
    ## `captureCount: int`
    ## :   the number of captures that the pattern has.
@@ -140,23 +137,23 @@ type
    ##     NEL (next line, U+0085), LS (line separator, U+2028), and PS
    ##     (paragraph separator, U+2029). For the 8-bit library, the last two
    ##     are recognized only in UTF-8 mode.
-    ##     —  man pcre
+    ##     -- man pcre2pattern
    ##
    ## -  `(*JAVASCRIPT_COMPAT)` - JavaScript compatibility
    ## -  `(*NO_STUDY)` - turn off studying; study is enabled by default
    ##
    ## For more details on the leading option groups, see the `Option
-    ## Setting <https://man7.org/linux/man-pages/man3/pcresyntax.3.html#OPTION_SETTING>`_
+    ## Setting <https://www.pcre.org/current/doc/html/pcre2syntax.html#SEC16>`_
    ## and the `Newline
-    ## Convention <https://man7.org/linux/man-pages/man3/pcresyntax.3.html#NEWLINE_CONVENTION>`_
-    ## sections of the `PCRE syntax
-    ## manual <https://man7.org/linux/man-pages/man3/pcresyntax.3.html>`_.
+    ## Convention <https://www.pcre.org/current/doc/html/pcre2syntax.html#SEC17>`_
+    ## sections of the `PCRE2 syntax
+    ## manual <https://www.pcre.org/current/doc/html/pcre2syntax.html>`_.
    ##
-    ## Some of these options are not part of PCRE and are converted by nre
-    ## into PCRE flags. These include `NEVER_UTF`, `ANCHORED`,
+    ## Some of these options are not part of a pattern and are converted by nre
+    ## into PCRE2 flags. These include `NEVER_UTF`, `ANCHORED`,
    ## `DOLLAR_ENDONLY`, `FIRSTLINE`, `NO_AUTO_CAPTURE`,
-    ## `JAVASCRIPT_COMPAT`, `U`, `NO_STUDY`. In other PCRE wrappers, you
-    ## will need to pass these as separate flags to PCRE.
+    ## `JAVASCRIPT_COMPAT`, `U`, `NO_STUDY`. In other PCRE2 wrappers, you
+    ## will need to pass these as separate flags to PCRE2.

  RegexMatch* = object
    ## Usually seen as `Option[RegexMatch]`, it represents the result of an
@@ -196,7 +193,7 @@ type
    pattern*: Regex  ## The regex doing the matching.
                     ## Not nil.
    str*: string  ## The string that was matched against.
-    pcreMatchBounds: seq[HSlice[cint, cint]] ## First item is the bounds of the match
+    pcreMatchBounds: seq[HSlice[csize_t, csize_t]] ## First item is the bounds of the match
                                            ## Other items are the captures
                                            ## `a` is inclusive start, `b` is exclusive end

@@ -227,38 +224,32 @@ when defined(gcDestructors):
  when defined(nimAllowNonVarDestructor) and defined(nimPreviewNonVarDestructor):
    proc `=destroy`(pattern: RegexDesc) =
      `=destroy`(pattern.pattern)
-      pcre.free_substring(cast[cstring](pattern.pcreObj))
-      if pattern.pcreExtra != nil:
-        pcre.free_study(pattern.pcreExtra)
+      pcre.code_free(pattern.pcreObj)
      `=destroy`(pattern.captureNameToId)
  else:
    proc `=destroy`(pattern: var RegexDesc) =
      `=destroy`(pattern.pattern)
-      pcre.free_substring(cast[cstring](pattern.pcreObj))
-      if pattern.pcreExtra != nil:
-        pcre.free_study(pattern.pcreExtra)
+      pcre.code_free(pattern.pcreObj)
      `=destroy`(pattern.captureNameToId)
 else:
  proc destroyRegex(pattern: Regex) =
    `=destroy`(pattern.pattern)
-    pcre.free_substring(cast[cstring](pattern.pcreObj))
-    if pattern.pcreExtra != nil:
-      pcre.free_study(pattern.pcreExtra)
+    pcre.code_free(pattern.pcreObj)
    `=destroy`(pattern.captureNameToId)

-proc getinfo[T](pattern: Regex, opt: cint): T =
+proc getinfo[T](pattern: Regex, opt: uint32): T =
  result = default(T)
-  let retcode = pcre.fullinfo(pattern.pcreObj, pattern.pcreExtra, opt, addr result)
+  let retcode = pcre.pattern_info(pattern.pcreObj, opt, addr result)

  if retcode < 0:
    # XXX Error message that doesn't expose implementation details
    raise newException(FieldDefect, "Invalid getinfo for $1, errno $2" % [$opt, $retcode])

 proc getNameToNumberTable(pattern: Regex): Table[string, int] =
-  let entryCount = getinfo[cint](pattern, pcre.INFO_NAMECOUNT)
-  let entrySize = getinfo[cint](pattern, pcre.INFO_NAMEENTRYSIZE)
+  let entryCount = getinfo[uint32](pattern, pcre.INFO_NAMECOUNT).int
+  let entrySize = getinfo[uint32](pattern, pcre.INFO_NAMEENTRYSIZE).int
  let table = cast[ptr UncheckedArray[uint8]](
-                getinfo[int](pattern, pcre.INFO_NAMETABLE))
+                getinfo[pointer](pattern, pcre.INFO_NAMETABLE))

  result = initTable[string, int]()

@@ -274,61 +265,69 @@ proc getNameToNumberTable(pattern: Regex): Table[string, int] =

    result[name] = num

-proc initRegex(pattern: string, flags: int, study = true): Regex =
+proc pcreErrorMessage(errorCode: cint): string =
+  var buffer: array[256, uint8]
+  let length = pcre.get_error_message(errorCode, addr buffer[0], buffer.len.csize_t)
+  if length >= 0:
+    result = newString(length)
+    if length > 0:
+      copyMem(addr result[0], addr buffer[0], length)
+  else:
+    result = $errorCode
+
+proc jitCompile(pattern: ptr pcre.Pcre) =
+  var hasJit: cint = 0
+  if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0 and hasJit == 1:
+    discard pcre.jit_compile(pattern, pcre.JIT_COMPLETE.uint32)
+
+proc initRegex(pattern: string, flags: uint32, study = true): Regex =
  when defined(gcDestructors):
    result = Regex()
  else:
    new(result, destroyRegex)
  result.pattern = pattern

-  var errorMsg: cstring = ""
-  var errOffset: cint = 0
+  var
+    errorCode: cint = 0
+    errOffset: csize_t = 0

-  result.pcreObj = pcre.compile(cstring(pattern),
-                                # better hope int is at least 4 bytes..
-                                cint(flags), addr errorMsg,
+  result.pcreObj = pcre.compile(cast[ptr uint8](cstring(pattern)),
+                                pattern.len.csize_t, flags, addr errorCode,
                                addr errOffset, nil)
  if result.pcreObj == nil:
    # failed to compile
-    raise SyntaxError(msg: $errorMsg, pos: errOffset, pattern: pattern)
+    raise SyntaxError(msg: pcreErrorMessage(errorCode), pos: errOffset.int,
+                      pattern: pattern)

  if study:
-    var options: cint = 0
-    var hasJit: cint = cint(0)
-    if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0:
-      if hasJit == 1'i32:
-        options = pcre.STUDY_JIT_COMPILE
-    result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
-    if errorMsg != nil:
-      raise StudyError(msg: $errorMsg)
+    jitCompile(result.pcreObj)

  result.captureNameToId = result.getNameToNumberTable()

 proc captureCount*(pattern: Regex): int =
-  return getinfo[cint](pattern, pcre.INFO_CAPTURECOUNT)
+  return getinfo[uint32](pattern, pcre.INFO_CAPTURECOUNT).int

 proc captureNameId*(pattern: Regex): Table[string, int] =
  return pattern.captureNameToId

 proc matchesCrLf(pattern: Regex): bool =
-  let flags = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS))
-  let newlineFlags = flags and (pcre.NEWLINE_CRLF or
-                                pcre.NEWLINE_ANY or
-                                pcre.NEWLINE_ANYCRLF)
-  if newlineFlags > 0u32:
+  let newline = getinfo[uint32](pattern, pcre.INFO_NEWLINE)
+  case newline
+  of pcre.NEWLINE_CRLF, pcre.NEWLINE_ANY, pcre.NEWLINE_ANYCRLF:
    return true
+  of pcre.NEWLINE_CR, pcre.NEWLINE_LF, pcre.NEWLINE_NUL:
+    return false
+  else:
+    discard

  # get flags from build config
-  var confFlags: cint = cint(0)
+  var confFlags: uint32 = 0
  if pcre.config(pcre.CONFIG_NEWLINE, addr confFlags) != 0:
    assert(false, "CONFIG_NEWLINE apparently got screwed up")

  case confFlags
-  of 13: return false
-  of 10: return false
-  of (13 shl 8) or 10: return true
-  of -2: return true
-  of -1: return true
+  of pcre.NEWLINE_CR, pcre.NEWLINE_LF, pcre.NEWLINE_NUL: return false
+  of pcre.NEWLINE_CRLF, pcre.NEWLINE_ANY, pcre.NEWLINE_ANYCRLF: return true
  else: return false


@@ -338,7 +337,9 @@ func captures*(pattern: RegexMatch): Captures = return Captures(pattern)

 func contains*(pattern: CaptureBounds, i: int): bool =
  let pattern = RegexMatch(pattern)
-  pattern.pcreMatchBounds[i + 1].a != -1
+  let index = i + 1
+  index >= 0 and index < pattern.pcreMatchBounds.len and
+    pattern.pcreMatchBounds[index].a != pcre.UNSET

 func contains*(pattern: Captures, i: int): bool =
  i in CaptureBounds(pattern)
@@ -349,7 +350,7 @@ func `[]`*(pattern: CaptureBounds, i: int): HSlice[int, int] =
    raise newException(IndexDefect, "Group '" & $i & "' was not captured")

  let bounds = pattern.pcreMatchBounds[i + 1]
-  int(bounds.a)..int(bounds.b-1)
+  int(bounds.a) .. (int(bounds.b) - 1)

 func `[]`*(pattern: Captures, i: int): string =
  let pattern = RegexMatch(pattern)
@@ -437,8 +438,7 @@ proc `$`*(pattern: RegexMatch): string =
 proc `==`*(a, b: Regex): bool =
  if not a.isNil and not b.isNil:
    return a.pattern == b.pattern and
-           a.pcreObj == b.pcreObj and
-           a.pcreExtra == b.pcreExtra
+           a.pcreObj == b.pcreObj
  else:
    return system.`==`(a, b)

@@ -453,7 +453,7 @@ const PcreOptions = {
  "FIRSTLINE": pcre.FIRSTLINE,
  "NO_AUTO_CAPTURE": pcre.NO_AUTO_CAPTURE,
  "JAVASCRIPT_COMPAT": pcre.JAVASCRIPT_COMPAT,
-  "U": pcre.UTF8 or pcre.UCP
+  "U": pcre.UTF or pcre.UCP
 }.toTable

 # Options that are supported inside regular expressions themselves
@@ -503,46 +503,63 @@ proc extractOptions(pattern: string): tuple[pattern: string, flags: int, study:

 proc re*(pattern: string): Regex =
  let (pattern, flags, study) = extractOptions(pattern)
-  initRegex(pattern, flags, study)
+  initRegex(pattern, cast[uint32](flags), study)

-proc matchImpl(str: string, pattern: Regex, start, endpos: int, flags: int): Option[RegexMatch] =
+func isInvalidUnicodeError(errorCode: cint): bool =
+  (errorCode <= pcre.ERROR_UTF8_ERR1 and errorCode >= pcre.ERROR_UTF8_ERR21) or
+    errorCode == pcre.ERROR_BADUTFOFFSET or
+    errorCode == pcre.ERROR_DFA_UINVALID_UTF
+
+proc newMatchData(pattern: Regex): ptr pcre.MatchData =
+  result = pcre.match_data_create_from_pattern(pattern.pcreObj, nil)
+  if result == nil:
+    raise RegexInternalError(msg: "could not allocate PCRE2 match data")
+
+proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32): Option[RegexMatch] =
  var myResult = RegexMatch(pattern: pattern, str: str)
-  # See PCRE man pages.
-  # 2x capture count to make room for start-end pairs
-  # 1x capture count as slack space for PCRE
-  let vecsize = (pattern.captureCount() + 1) * 3
-  # div 2 because each element is 2 cints long
-  # plus 1 because we need the ceiling, not the floor
-  myResult.pcreMatchBounds = newSeq[HSlice[cint, cint]]((vecsize + 1) div 2)
-  myResult.pcreMatchBounds.setLen(vecsize div 3)
+  myResult.pcreMatchBounds = newSeq[HSlice[csize_t, csize_t]](pattern.captureCount() + 1)

  let strlen = if endpos == int.high: str.len else: endpos+1
  doAssert(strlen <= str.len)  # don't want buffer overflows
+  if start < 0 or start > strlen:
+    return none(RegexMatch)
+
+  let matchData = newMatchData(pattern)
+  defer: pcre.match_data_free(matchData)
+  let execRet = pcre.match(pattern.pcreObj,
+                           cast[ptr uint8](cstring(str)),
+                           strlen.csize_t,
+                           start.csize_t,
+                           options,
+                           matchData,
+                           nil)
+  let rawMatches = cast[ptr UncheckedArray[csize_t]](pcre.get_ovector_pointer(matchData))
+  let ovectorCount = min(myResult.pcreMatchBounds.len,
+                         pcre.get_ovector_count(matchData).int)
+  for i in 0 ..< ovectorCount:
+    myResult.pcreMatchBounds[i] = rawMatches[i * 2] .. rawMatches[i * 2 + 1]

-  let execRet = pcre.exec(pattern.pcreObj,
-                          pattern.pcreExtra,
-                          cstring(str),
-                          cint(strlen),
-                          cint(start),
-                          cint(flags),
-                          cast[ptr cint](addr myResult.pcreMatchBounds[0]),
-                          cint(vecsize))
  if execRet >= 0:
    return some(myResult)

-  case execRet:
-    of pcre.ERROR_NOMATCH:
-      return none(RegexMatch)
-    of pcre.ERROR_NULL:
-      raise newException(AccessViolationDefect, "Expected non-null parameters")
-    of pcre.ERROR_BADOPTION:
-      raise RegexInternalError(msg: "Unknown pattern flag. Either a bug or " &
-        "outdated PCRE.")
-    of pcre.ERROR_BADUTF8, pcre.ERROR_SHORTUTF8, pcre.ERROR_BADUTF8_OFFSET:
-      raise InvalidUnicodeError(msg: "Invalid unicode byte sequence",
-        pos: myResult.pcreMatchBounds[0].a)
+  if isInvalidUnicodeError(execRet):
+    let errorPos = if myResult.pcreMatchBounds.len > 0 and
+        myResult.pcreMatchBounds[0].a != pcre.UNSET:
+      myResult.pcreMatchBounds[0].a.int
    else:
-      raise RegexInternalError(msg: "Unknown internal error: " & $execRet)
+      start
+    raise InvalidUnicodeError(msg: "Invalid unicode byte sequence", pos: errorPos)
+
+  case execRet
+  of pcre.ERROR_NOMATCH:
+    return none(RegexMatch)
+  of pcre.ERROR_NULL:
+    raise newException(AccessViolationDefect, "Expected non-null parameters")
+  of pcre.ERROR_BADOPTION:
+    raise RegexInternalError(msg: "Unknown pattern flag. Either a bug or " &
+      "outdated PCRE2.")
+  else:
+    raise RegexInternalError(msg: "Unknown internal error: " & $execRet)

 proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] =
  ## Like `find(...)<#find,string,Regex,int>`_, but anchored to the start of the
@@ -559,7 +576,7 @@ proc match*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[R
    assert 0 in "abc".match(re"(\w)").get.captureBounds
    assert "abc".match(re"").get.captureBounds[-1] == 0 .. -1
    assert "abc".match(re"abc").get.captureBounds[-1] == 0 .. 2
-  return str.matchImpl(pattern, start, endpos, pcre.ANCHORED)
+  return str.matchImpl(pattern, start, endpos, cast[uint32](pcre.ANCHORED))

 iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): RegexMatch =
  ## Works the same as `find(...)<#find,string,Regex,int>`_, but finds every
@@ -573,21 +590,21 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R
  ## Variants:
  ##
  ## -  `proc findAll(...)` returns a `seq[string]`
-  # see pcredemo for explanation => https://www.pcre.org/original/doc/html/pcredemo.html
+  # see pcre2demo for explanation => https://www.pcre.org/current/doc/html/pcre2demo.html
  let matchesCrLf = pattern.matchesCrLf()
-  let unicode = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS) and
-    pcre.UTF8) > 0u32
+  let unicode = uint32(getinfo[uint32](pattern, pcre.INFO_ALLOPTIONS) and
+    pcre.UTF.uint32) > 0u32
  let strlen = if endpos == int.high: str.len else: endpos+1
  var offset = start
  var match: Option[RegexMatch] = default(Option[RegexMatch])
  var neverMatched = true

  while true:
-    var flags = 0
+    var flags = 0'u32
    if match.isSome and
       match.get.matchBounds.a > match.get.matchBounds.b:
      # 0-len match
-      flags = pcre.NOTEMPTY_ATSTART
+      flags = pcre.NOTEMPTY_ATSTART.uint32
    match = str.matchImpl(pattern, offset, endpos, flags)

    if match.isNone:
@@ -623,7 +640,7 @@ proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[Re
  ## `endpos`
  ## :   The maximum index for a match; `int.high` means the end of the
  ##     string, otherwise it’s an inclusive upper bound.
-  return str.matchImpl(pattern, start, endpos, 0)
+  return str.matchImpl(pattern, start, endpos, 0'u32)

 proc findAll*(str: string, pattern: Regex, start = 0, endpos = int.high): seq[string] =
  result = @[]
--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -8,27 +8,25 @@
 #

 when defined(js):
-  {.error: "This library needs to be compiled with a c-like backend, and depends on PCRE; See jsre for JS backend.".}
+  {.error: "This library needs to be compiled with a c-like backend, and depends on PCRE2; See jsre for JS backend.".}

 ## .. warning:: This module is deprecated.
 ##   Use [Regex](https://github.com/nitely/nim-regex).
-##   PCRE library is now at end of life.
+##   This compatibility module uses PCRE2.
 ##
 ## Regular expression support for Nim.
 ##
 ## This module is implemented by providing a wrapper around the
-## `PCRE (Perl-Compatible Regular Expressions) <https://www.pcre.org>`_
-## C library. This means that your application will depend on the PCRE
+## `PCRE2 (Perl-Compatible Regular Expressions) <https://www.pcre.org>`_
+## C library. This means that your application will depend on the PCRE2
 ## library's licence when using this module, which should not be a problem
 ## though.
 ##
 ## .. note:: There are also alternative nimble packages such as [tinyre](https://github.com/khchen/tinyre)
 ##   and [regex](https://github.com/nitely/nim-regex).
 ##
-## PCRE's licence follows:
-##
-## .. include:: ../../doc/regexprs.txt
-##
+## PCRE2 is distributed under a BSD-style licence.
+

 runnableExamples:
  ## Unless specified otherwise, `start` parameter in each proc indicates
@@ -40,7 +38,7 @@ runnableExamples:
    # can't match start of string since we're starting at 1

 import
-  std/[pcre, strutils, rtarrays]
+  std/[pcre2, strutils]

 when defined(nimPreviewSlimSystem):
  import std/syncio
@@ -60,8 +58,7 @@ type
                        ## expression will be used only once)

  RegexDesc = object
-    h: ptr Pcre
-    e: ptr ExtraData
+    h: ptr pcre2.Pcre

  Regex* = ref RegexDesc ## a compiled regular expression

@@ -71,14 +68,10 @@ type
 when defined(gcDestructors):
  when defined(nimAllowNonVarDestructor):
    proc `=destroy`(x: RegexDesc) =
-      pcre.free_substring(cast[cstring](x.h))
-      if not isNil(x.e):
-        pcre.free_study(x.e)
+      pcre2.code_free(x.h)
  else:
    proc `=destroy`(x: var RegexDesc) =
-      pcre.free_substring(cast[cstring](x.h))
-      if not isNil(x.e):
-        pcre.free_study(x.e)
+      pcre2.code_free(x.h)

 proc raiseInvalidRegex(msg: string) {.noinline, noreturn.} =
  var e: ref RegexError
@@ -86,21 +79,43 @@ proc raiseInvalidRegex(msg: string) {.noinline, noreturn.} =
  e.msg = msg
  raise e

-proc rawCompile(pattern: string, flags: cint): ptr Pcre =
+proc pcre2ErrorMessage(errorCode: cint): string =
+  var buffer: array[256, uint8]
+  let length = pcre2.get_error_message(errorCode, addr buffer[0], buffer.len.csize_t)
+  if length >= 0:
+    result = newString(length)
+    if length > 0:
+      copyMem(addr result[0], addr buffer[0], length)
+  else:
+    result = $errorCode
+
+proc rawCompile(pattern: string, options: uint32): ptr pcre2.Pcre =
  var
-    msg: cstring = ""
-    offset: cint = 0
-  result = pcre.compile(pattern, flags, addr(msg), addr(offset), nil)
+    errorCode: cint = 0
+    offset: csize_t = 0
+  result = pcre2.compile(cast[ptr uint8](pattern.cstring), pattern.len.csize_t,
+                         options, addr errorCode, addr offset, nil)
  if result == nil:
-    raiseInvalidRegex($msg & "\n" & pattern & "\n" & spaces(offset) & "^\n")
+    raiseInvalidRegex(pcre2ErrorMessage(errorCode) & "\n" & pattern & "\n" &
+                      spaces(offset.int) & "^\n")

 proc finalizeRegEx(x: Regex) =
-  # XXX This is a hack, but PCRE does not export its "free" function properly.
-  # Sigh. The hack relies on PCRE's implementation (see `pcre_get.c`).
-  # Fortunately the implementation is unlikely to change.
-  pcre.free_substring(cast[cstring](x.h))
-  if not isNil(x.e):
-    pcre.free_study(x.e)
+  pcre2.code_free(x.h)
+
+func toPcre2Options(flags: set[RegexFlag]): uint32 =
+  if reIgnoreCase in flags:
+    result = result or pcre2.CASELESS.uint32
+  if reMultiLine in flags:
+    result = result or pcre2.MULTILINE.uint32
+  if reDotAll in flags:
+    result = result or pcre2.DOTALL.uint32
+  if reExtended in flags:
+    result = result or pcre2.EXTENDED.uint32
+
+proc jitCompile(pattern: ptr pcre2.Pcre) =
+  var hasJit: cint = 0
+  if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0 and hasJit == 1:
+    discard pcre2.jit_compile(pattern, pcre2.JIT_COMPLETE.uint32)

 proc re*(s: string, flags = {reStudy}): Regex =
  ## Constructor of regular expressions.
@@ -116,16 +131,9 @@ proc re*(s: string, flags = {reStudy}): Regex =
    result = Regex()
  else:
    new(result, finalizeRegEx)
-  result.h = rawCompile(s, cast[cint](flags - {reStudy}))
+  result.h = rawCompile(s, toPcre2Options(flags))
  if reStudy in flags:
-    var msg: cstring = ""
-    var options: cint = 0
-    var hasJit: cint = 0
-    if pcre.config(pcre.CONFIG_JIT, addr hasJit) == 0:
-      if hasJit == 1'i32:
-        options = pcre.STUDY_JIT_COMPILE
-    result.e = pcre.study(result.h, options, addr msg)
-    if not isNil(msg): raiseInvalidRegex($msg)
+    jitCompile(result.h)

 proc rex*(s: string, flags = {reStudy, reExtended}): Regex =
  ## Constructor for extended regular expressions.
@@ -142,25 +150,58 @@ proc bufSubstr(b: cstring, sPos, ePos: int): string {.inline.} =
  copyMem(addr(result[0]), unsafeAddr(b[sPos]), sz)
  result.setLen(sz)

-proc matchOrFind(buf: cstring, pattern: Regex, matches: var openArray[string],
-                 start, bufSize, flags: cint): cint =
-  var
-    rtarray = initRtArray[cint]((matches.len+1)*3)
-    rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize, start, flags,
-      cast[ptr cint](rawMatches), (matches.len+1).cint*3)
-  if res < 0'i32: return res
-  for i in 1..int(res)-1:
-    var a = rawMatches[i * 2]
-    var b = rawMatches[i * 2 + 1]
-    if a >= 0'i32:
-      matches[i-1] = bufSubstr(buf, int(a), int(b))
-    else: matches[i-1] = ""
-  return rawMatches[1] - rawMatches[0]
+proc newMatchData(slots: int): ptr pcre2.MatchData =
+  result = pcre2.match_data_create(max(slots, 1).uint32, nil)
+  if result == nil:
+    raiseInvalidRegex("could not allocate PCRE2 match data")

-const MaxReBufSize* = high(cint)
-  ## Maximum PCRE (API 1) buffer start/size equal to `high(cint)`, which even
-  ## for 64-bit systems can be either 2`31`:sup:-1 or 2`63`:sup:-1.
+template ovector(matchData: ptr pcre2.MatchData): ptr UncheckedArray[csize_t] =
+  cast[ptr UncheckedArray[csize_t]](pcre2.get_ovector_pointer(matchData))
+
+proc rawMatch(buf: cstring, pattern: Regex, start, bufSize: int,
+              options: uint32, matchData: ptr pcre2.MatchData): cint =
+  if start < 0 or bufSize < 0:
+    return pcre2.ERROR_BADOFFSET
+  pcre2.match(pattern.h, cast[ptr uint8](buf), bufSize.csize_t,
+              start.csize_t, options, matchData, nil)
+
+proc copyStringMatches(buf: cstring, rawMatches: ptr UncheckedArray[csize_t],
+                       captureCount: int, matches: var openArray[string]) =
+  let upper = min(captureCount - 1, matches.len)
+  if upper > 0:
+    for i in 1 .. upper:
+      let matchStart = rawMatches[i * 2]
+      let matchEnd = rawMatches[i * 2 + 1]
+      if matchStart != pcre2.UNSET:
+        matches[i-1] = bufSubstr(buf, int(matchStart), int(matchEnd))
+      else:
+        matches[i-1] = ""
+
+proc copyBoundsMatches(rawMatches: ptr UncheckedArray[csize_t],
+                       captureCount: int,
+                       matches: var openArray[tuple[first, last: int]]) =
+  let upper = min(captureCount - 1, matches.len)
+  if upper > 0:
+    for i in 1 .. upper:
+      let matchStart = rawMatches[i * 2]
+      let matchEnd = rawMatches[i * 2 + 1]
+      if matchStart != pcre2.UNSET:
+        matches[i-1] = (int(matchStart), int(matchEnd) - 1)
+      else:
+        matches[i-1] = (-1, 0)
+
+proc matchOrFind(buf: cstring, pattern: Regex, matches: var openArray[string],
+                 start, bufSize: int, options: uint32): int =
+  let matchData = newMatchData(matches.len + 1)
+  defer: pcre2.match_data_free(matchData)
+  let res = rawMatch(buf, pattern, start, bufSize, options, matchData)
+  let rawMatches = ovector(matchData)
+  if res < 0: return int(res)
+  copyStringMatches(buf, rawMatches, int(res), matches)
+  return int(rawMatches[1]) - int(rawMatches[0])
+
+const MaxReBufSize* = high(int)
+  ## Maximum PCRE2 buffer start/size accepted by this Nim API.

 proc findBounds*(buf: cstring, pattern: Regex, matches: var openArray[string],
                 start = 0, bufSize: int): tuple[first, last: int] =
@@ -172,17 +213,12 @@ proc findBounds*(buf: cstring, pattern: Regex, matches: var openArray[string],
  ##
  ## Note: The memory for `matches` needs to be allocated before this function is
  ## called, otherwise it will just remain empty.
-  var
-    rtarray = initRtArray[cint]((matches.len+1)*3)
-    rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), (matches.len+1).cint*3)
-  if res < 0'i32: return (-1, 0)
-  for i in 1..int(res)-1:
-    var a = rawMatches[i * 2]
-    var b = rawMatches[i * 2 + 1]
-    if a >= 0'i32: matches[i-1] = bufSubstr(buf, int(a), int(b))
-    else: matches[i-1] = ""
+  let matchData = newMatchData(matches.len + 1)
+  defer: pcre2.match_data_free(matchData)
+  let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
+  let rawMatches = ovector(matchData)
+  if res < 0: return (-1, 0)
+  copyStringMatches(buf, rawMatches, int(res), matches)
  return (rawMatches[0].int, rawMatches[1].int - 1)

 proc findBounds*(s: string, pattern: Regex, matches: var openArray[string],
@@ -212,17 +248,12 @@ proc findBounds*(buf: cstring, pattern: Regex,
  ## `(-1,0)` is returned.
  ##
  ## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
-  var
-    rtarray = initRtArray[cint]((matches.len+1)*3)
-    rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), (matches.len+1).cint*3)
-  if res < 0'i32: return (-1, 0)
-  for i in 1..int(res)-1:
-    var a = rawMatches[i * 2]
-    var b = rawMatches[i * 2 + 1]
-    if a >= 0'i32: matches[i-1] = (int(a), int(b)-1)
-    else: matches[i-1] = (-1,0)
+  let matchData = newMatchData(matches.len + 1)
+  defer: pcre2.match_data_free(matchData)
+  let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
+  let rawMatches = ovector(matchData)
+  if res < 0: return (-1, 0)
+  copyBoundsMatches(rawMatches, int(res), matches)
  return (rawMatches[0].int, rawMatches[1].int - 1)

 proc findBounds*(s: string, pattern: Regex,
@@ -244,29 +275,28 @@ proc findBounds*(s: string, pattern: Regex,
      min(start, MaxReBufSize), min(s.len, MaxReBufSize))

 proc findBoundsImpl(buf: cstring, pattern: Regex,
-                    start = 0, bufSize = 0, flags = 0): tuple[first, last: int] =
-  var rtarray = initRtArray[cint](3)
-  let rawMatches = rtarray.getRawData
-  let res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, flags.int32,
-                cast[ptr cint](rawMatches), 3)
-
-  if res < 0'i32:
+                    start = 0, bufSize = 0,
+                    options = 0'u32): tuple[first, last: int] =
+  let matchData = newMatchData(1)
+  defer: pcre2.match_data_free(matchData)
+  let res = rawMatch(buf, pattern, start, bufSize, options, matchData)
+  let rawMatches = ovector(matchData)
+  if res < 0:
    result = (-1, 0)
  else:
-    result = (int(rawMatches[0]), int(rawMatches[1]-1))
+    result = (int(rawMatches[0]), int(rawMatches[1]) - 1)

 proc findBounds*(buf: cstring, pattern: Regex,
                 start = 0, bufSize: int): tuple[first, last: int] =
  ## returns the `first` and `last` position of `pattern` in `buf`,
  ## where `buf` has length `bufSize` (not necessarily `'\0'` terminated).
  ## If it does not match, `(-1,0)` is returned.
-  var
-    rtarray = initRtArray[cint](3)
-    rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), 3)
-  if res < 0'i32: return (int(res), 0)
-  return (int(rawMatches[0]), int(rawMatches[1]-1))
+  let matchData = newMatchData(1)
+  defer: pcre2.match_data_free(matchData)
+  let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
+  let rawMatches = ovector(matchData)
+  if res < 0: return (int(res), 0)
+  return (int(rawMatches[0]), int(rawMatches[1]) - 1)

 proc findBounds*(s: string, pattern: Regex,
                 start = 0): tuple[first, last: int] {.inline.} =
@@ -279,14 +309,16 @@ proc findBounds*(s: string, pattern: Regex,
  result = findBounds(cstring(s), pattern,
      min(start, MaxReBufSize), min(s.len, MaxReBufSize))

-proc matchOrFind(buf: cstring, pattern: Regex, start, bufSize: int, flags: cint): cint =
-  var
-    rtarray = initRtArray[cint](3)
-    rawMatches = rtarray.getRawData
-  result = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, flags,
-                    cast[ptr cint](rawMatches), 3)
-  if result >= 0'i32:
-    result = rawMatches[1] - rawMatches[0]
+proc matchOrFind(buf: cstring, pattern: Regex, start, bufSize: int,
+                 options: uint32): int =
+  let matchData = newMatchData(1)
+  defer: pcre2.match_data_free(matchData)
+  let res = rawMatch(buf, pattern, start, bufSize, options, matchData)
+  if res >= 0:
+    let rawMatches = ovector(matchData)
+    result = int(rawMatches[1]) - int(rawMatches[0])
+  else:
+    result = int(res)

 proc matchLen*(s: string, pattern: Regex, matches: var openArray[string],
              start = 0): int {.inline.} =
@@ -295,7 +327,7 @@ proc matchLen*(s: string, pattern: Regex, matches: var openArray[string],
  ## of zero can happen.
  ##
  ## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
-  result = matchOrFind(cstring(s), pattern, matches, start.cint, s.len.cint, pcre.ANCHORED)
+  result = matchOrFind(cstring(s), pattern, matches, start, s.len, cast[uint32](pcre2.ANCHORED))

 proc matchLen*(buf: cstring, pattern: Regex, matches: var openArray[string],
              start = 0, bufSize: int): int {.inline.} =
@@ -304,7 +336,7 @@ proc matchLen*(buf: cstring, pattern: Regex, matches: var openArray[string],
  ## of zero can happen.
  ##
  ## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
-  return matchOrFind(buf, pattern, matches, start.cint, bufSize.cint, pcre.ANCHORED)
+  return matchOrFind(buf, pattern, matches, start, bufSize, cast[uint32](pcre2.ANCHORED))

 proc matchLen*(s: string, pattern: Regex, start = 0): int {.inline.} =
  ## the same as `match`, but it returns the length of the match,
@@ -315,13 +347,13 @@ proc matchLen*(s: string, pattern: Regex, start = 0): int {.inline.} =
    doAssert matchLen("abcdefg", re"cde", 2) == 3
    doAssert matchLen("abcdefg", re"abcde") == 5
    doAssert matchLen("abcdefg", re"cde") == -1
-  result = matchOrFind(cstring(s), pattern, start.cint, s.len.cint, pcre.ANCHORED)
+  result = matchOrFind(cstring(s), pattern, start, s.len, cast[uint32](pcre2.ANCHORED))

 proc matchLen*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int {.inline.} =
  ## the same as `match`, but it returns the length of the match,
  ## if there is no match, `-1` is returned. Note that a match length
  ## of zero can happen.
-  result = matchOrFind(buf, pattern, start.cint, bufSize, pcre.ANCHORED)
+  result = matchOrFind(buf, pattern, start, bufSize, cast[uint32](pcre2.ANCHORED))

 proc match*(s: string, pattern: Regex, start = 0): bool {.inline.} =
  ## returns `true` if `s[start..]` matches the `pattern`.
@@ -361,18 +393,13 @@ proc find*(buf: cstring, pattern: Regex, matches: var openArray[string],
  ## `buf` has length `bufSize` (not necessarily `'\0'` terminated).
  ##
  ## .. note:: The memory for `matches` needs to be allocated before this function is called, otherwise it will just remain empty.
-  var
-    rtarray = initRtArray[cint]((matches.len+1)*3)
-    rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), (matches.len+1).cint*3)
-  if res < 0'i32: return res
-  for i in 1..int(res)-1:
-    var a = rawMatches[i * 2]
-    var b = rawMatches[i * 2 + 1]
-    if a >= 0'i32: matches[i-1] = bufSubstr(buf, int(a), int(b))
-    else: matches[i-1] = ""
-  return rawMatches[0]
+  let matchData = newMatchData(matches.len + 1)
+  defer: pcre2.match_data_free(matchData)
+  let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
+  let rawMatches = ovector(matchData)
+  if res < 0: return int(res)
+  copyStringMatches(buf, rawMatches, int(res), matches)
+  return int(rawMatches[0])

 proc find*(s: string, pattern: Regex, matches: var openArray[string],
           start = 0): int {.inline.} =
@@ -387,13 +414,12 @@ proc find*(buf: cstring, pattern: Regex, start = 0, bufSize: int): int =
  ## returns the starting position of `pattern` in `buf`,
  ## where `buf` has length `bufSize` (not necessarily `'\0'` terminated).
  ## If it does not match, `-1` is returned.
-  var
-    rtarray = initRtArray[cint](3)
-    rawMatches = rtarray.getRawData
-    res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, start.cint, 0'i32,
-      cast[ptr cint](rawMatches), 3)
-  if res < 0'i32: return res
-  return rawMatches[0]
+  let matchData = newMatchData(1)
+  defer: pcre2.match_data_free(matchData)
+  let res = rawMatch(buf, pattern, start, bufSize, 0'u32, matchData)
+  let rawMatches = ovector(matchData)
+  if res < 0: return int(res)
+  return int(rawMatches[0])

 proc find*(s: string, pattern: Regex, start = 0): int {.inline.} =
  ## returns the starting position of `pattern` in `s`. If it does not
@@ -413,40 +439,38 @@ iterator findAll*(s: string, pattern: Regex, start = 0): string =
  ##
  ## Note that since this is an iterator you should not modify the string you
  ## are iterating over: bad things could happen.
-  var
-    i = int32(start)
-    rtarray = initRtArray[cint](3)
-    rawMatches = rtarray.getRawData
+  var i = start
+  let matchData = newMatchData(1)
+  defer: pcre2.match_data_free(matchData)
  while true:
-    let res = pcre.exec(pattern.h, pattern.e, s, len(s).cint, i, 0'i32,
-      cast[ptr cint](rawMatches), 3)
-    if res < 0'i32: break
-    let a = rawMatches[0]
-    let b = rawMatches[1]
-    if a == b and a == i: break
-    yield substr(s, int(a), int(b)-1)
-    i = b
+    let res = rawMatch(s.cstring, pattern, i, len(s), 0'u32, matchData)
+    if res < 0: break
+    let rawMatches = ovector(matchData)
+    let matchStart = rawMatches[0]
+    let matchEnd = rawMatches[1]
+    if matchStart == matchEnd and matchStart.int == i: break
+    yield substr(s, int(matchStart), int(matchEnd) - 1)
+    i = matchEnd.int

 iterator findAll*(buf: cstring, pattern: Regex, start = 0, bufSize: int): string =
  ## Yields all matching `substrings` of `s` that match `pattern`.
  ##
  ## Note that since this is an iterator you should not modify the string you
  ## are iterating over: bad things could happen.
-  var
-    i = int32(start)
-    rtarray = initRtArray[cint](3)
-    rawMatches = rtarray.getRawData
+  var i = start
+  let matchData = newMatchData(1)
+  defer: pcre2.match_data_free(matchData)
  while true:
-    let res = pcre.exec(pattern.h, pattern.e, buf, bufSize.cint, i, 0'i32,
-      cast[ptr cint](rawMatches), 3)
-    if res < 0'i32: break
-    let a = rawMatches[0]
-    let b = rawMatches[1]
-    if a == b and a == i: break
-    var str = newString(b-a)
-    copyMem(str[0].addr, unsafeAddr(buf[a]), b-a)
+    let res = rawMatch(buf, pattern, i, bufSize, 0'u32, matchData)
+    if res < 0: break
+    let rawMatches = ovector(matchData)
+    let matchStart = rawMatches[0]
+    let matchEnd = rawMatches[1]
+    if matchStart == matchEnd and matchStart.int == i: break
+    var str = newString(int(matchEnd - matchStart))
+    copyMem(str[0].addr, unsafeAddr(buf[int(matchStart)]), int(matchEnd - matchStart))
    yield str
-    i = b
+    i = matchEnd.int

 proc findAll*(s: string, pattern: Regex, start = 0): seq[string] {.inline.} =
  ## returns all matching `substrings` of `s` that match `pattern`.
@@ -503,7 +527,7 @@ proc replace*(s: string, sub: Regex, by = ""): string =
    doAssert "var1=key; var2=key2".replace(re"(\w+)=(\w+)", "?") == "?; ?"
  result = ""
  var prev = 0
-  var flags = int32(0)
+  var flags = 0'u32
  while prev < s.len:
    var match = findBoundsImpl(s.cstring, sub, prev, s.len, flags)
    flags = 0
@@ -512,7 +536,7 @@ proc replace*(s: string, sub: Regex, by = ""): string =
    add(result, by)
    if match.first > match.last:
      # 0-len match
-      flags = pcre.NOTEMPTY_ATSTART
+      flags = pcre2.NOTEMPTY_ATSTART.uint32
    prev = match.last + 1
  add(result, substr(s, prev))

--- a/lib/wrappers/pcre2.nim
+++ b/lib/wrappers/pcre2.nim
@@ -0,0 +1,260 @@
+#
+#            Nim's Runtime Library
+#        (c) Copyright 2026 Nim Contributors
+#
+#    See the file "copying.txt", included in this
+#    distribution, for details about the copyright.
+#
+
+## Wrapper for the 8-bit PCRE2 API.
+
+when sizeof(int) == 4:
+  const ANCHORED* = low(int)
+else:
+  const ANCHORED* = int(0x80000000)
+
+const
+  NO_UTF_CHECK* = int(0x40000000)
+  ENDANCHORED* = int(0x20000000)
+
+const
+  ALLOW_EMPTY_CLASS* = 0x00000001
+  ALT_BSUX* = 0x00000002
+  AUTO_CALLOUT* = 0x00000004
+  CASELESS* = 0x00000008
+  DOLLAR_ENDONLY* = 0x00000010
+  DOTALL* = 0x00000020
+  DUPNAMES* = 0x00000040
+  EXTENDED* = 0x00000080
+  FIRSTLINE* = 0x00000100
+  MATCH_UNSET_BACKREF* = 0x00000200
+  MULTILINE* = 0x00000400
+  NEVER_UCP* = 0x00000800
+  NEVER_UTF* = 0x00001000
+  NO_AUTO_CAPTURE* = 0x00002000
+  NO_AUTO_POSSESS* = 0x00004000
+  NO_DOTSTAR_ANCHOR* = 0x00008000
+  NO_START_OPTIMIZE* = 0x00010000
+  NO_START_OPTIMISE* = NO_START_OPTIMIZE
+  UCP* = 0x00020000
+  UNGREEDY* = 0x00040000
+  UTF* = 0x00080000
+  UTF8* = UTF
+  NEVER_BACKSLASH_C* = 0x00100000
+  ALT_CIRCUMFLEX* = 0x00200000
+  ALT_VERBNAMES* = 0x00400000
+  USE_OFFSET_LIMIT* = 0x00800000
+  EXTENDED_MORE* = 0x01000000
+  LITERAL* = 0x02000000
+  MATCH_INVALID_UTF* = 0x04000000
+  ALT_EXTENDED_CLASS* = 0x08000000
+
+  ## PCRE2 no longer exposes PCRE's `JAVASCRIPT_COMPAT` option. `ALT_BSUX`
+  ## preserves the most important JavaScript-style escape handling.
+  JAVASCRIPT_COMPAT* = ALT_BSUX
+
+const
+  JIT_COMPLETE* = 0x00000001
+  JIT_PARTIAL_SOFT* = 0x00000002
+  JIT_PARTIAL_HARD* = 0x00000004
+  JIT_INVALID_UTF* = 0x00000100
+  JIT_TEST_ALLOC* = 0x00000200
+
+const
+  NOTBOL* = 0x00000001
+  NOTEOL* = 0x00000002
+  NOTEMPTY* = 0x00000004
+  NOTEMPTY_ATSTART* = 0x00000008
+  PARTIAL_SOFT* = 0x00000010
+  PARTIAL_HARD* = 0x00000020
+  DFA_RESTART* = 0x00000040
+  DFA_SHORTEST* = 0x00000080
+  NO_JIT* = 0x00002000
+  COPY_MATCHED_SUBJECT* = 0x00004000
+  DISABLE_RECURSELOOP_CHECK* = 0x00040000
+
+const
+  NEWLINE_CR* = 1
+  NEWLINE_LF* = 2
+  NEWLINE_CRLF* = 3
+  NEWLINE_ANY* = 4
+  NEWLINE_ANYCRLF* = 5
+  NEWLINE_NUL* = 6
+  BSR_UNICODE* = 1
+  BSR_ANYCRLF* = 2
+
+const
+  ERROR_NOMATCH* = -1
+  ERROR_PARTIAL* = -2
+
+  ERROR_UTF8_ERR1* = -3
+  ERROR_UTF8_ERR2* = -4
+  ERROR_UTF8_ERR3* = -5
+  ERROR_UTF8_ERR4* = -6
+  ERROR_UTF8_ERR5* = -7
+  ERROR_UTF8_ERR6* = -8
+  ERROR_UTF8_ERR7* = -9
+  ERROR_UTF8_ERR8* = -10
+  ERROR_UTF8_ERR9* = -11
+  ERROR_UTF8_ERR10* = -12
+  ERROR_UTF8_ERR11* = -13
+  ERROR_UTF8_ERR12* = -14
+  ERROR_UTF8_ERR13* = -15
+  ERROR_UTF8_ERR14* = -16
+  ERROR_UTF8_ERR15* = -17
+  ERROR_UTF8_ERR16* = -18
+  ERROR_UTF8_ERR17* = -19
+  ERROR_UTF8_ERR18* = -20
+  ERROR_UTF8_ERR19* = -21
+  ERROR_UTF8_ERR20* = -22
+  ERROR_UTF8_ERR21* = -23
+
+  ERROR_BADDATA* = -29
+  ERROR_MIXEDTABLES* = -30
+  ERROR_BADMAGIC* = -31
+  ERROR_BADMODE* = -32
+  ERROR_BADOFFSET* = -33
+  ERROR_BADOPTION* = -34
+  ERROR_BADREPLACEMENT* = -35
+  ERROR_BADUTFOFFSET* = -36
+  ERROR_CALLOUT* = -37
+  ERROR_INTERNAL* = -44
+  ERROR_JIT_BADOPTION* = -45
+  ERROR_JIT_STACKLIMIT* = -46
+  ERROR_MATCHLIMIT* = -47
+  ERROR_NOMEMORY* = -48
+  ERROR_NOSUBSTRING* = -49
+  ERROR_NULL* = -51
+  ERROR_RECURSELOOP* = -52
+  ERROR_DEPTHLIMIT* = -53
+  ERROR_RECURSIONLIMIT* = ERROR_DEPTHLIMIT
+  ERROR_UNAVAILABLE* = -54
+  ERROR_UNSET* = -55
+  ERROR_BADOFFSETLIMIT* = -56
+  ERROR_HEAPLIMIT* = -63
+  ERROR_DFA_UINVALID_UTF* = -66
+  ERROR_INVALIDOFFSET* = -67
+  ERROR_JIT_UNSUPPORTED* = -68
+
+const
+  INFO_ALLOPTIONS* = 0
+  INFO_ARGOPTIONS* = 1
+  INFO_BACKREFMAX* = 2
+  INFO_BSR* = 3
+  INFO_CAPTURECOUNT* = 4
+  INFO_FIRSTCODEUNIT* = 5
+  INFO_FIRSTCODETYPE* = 6
+  INFO_FIRSTBITMAP* = 7
+  INFO_HASCRORLF* = 8
+  INFO_JCHANGED* = 9
+  INFO_JITSIZE* = 10
+  INFO_LASTCODEUNIT* = 11
+  INFO_LASTCODETYPE* = 12
+  INFO_MATCHEMPTY* = 13
+  INFO_MATCHLIMIT* = 14
+  INFO_MAXLOOKBEHIND* = 15
+  INFO_MINLENGTH* = 16
+  INFO_NAMECOUNT* = 17
+  INFO_NAMEENTRYSIZE* = 18
+  INFO_NAMETABLE* = 19
+  INFO_NEWLINE* = 20
+  INFO_DEPTHLIMIT* = 21
+  INFO_RECURSIONLIMIT* = INFO_DEPTHLIMIT
+  INFO_SIZE* = 22
+  INFO_HASBACKSLASHC* = 23
+  INFO_FRAMESIZE* = 24
+  INFO_HEAPLIMIT* = 25
+  INFO_EXTRAOPTIONS* = 26
+
+const
+  CONFIG_BSR* = 0
+  CONFIG_JIT* = 1
+  CONFIG_JITTARGET* = 2
+  CONFIG_LINKSIZE* = 3
+  CONFIG_MATCHLIMIT* = 4
+  CONFIG_NEWLINE* = 5
+  CONFIG_PARENSLIMIT* = 6
+  CONFIG_DEPTHLIMIT* = 7
+  CONFIG_RECURSIONLIMIT* = CONFIG_DEPTHLIMIT
+  CONFIG_STACKRECURSE* = 8
+  CONFIG_UNICODE* = 9
+  CONFIG_UNICODE_VERSION* = 10
+  CONFIG_VERSION* = 11
+  CONFIG_HEAPLIMIT* = 12
+  CONFIG_NEVER_BACKSLASH_C* = 13
+  CONFIG_COMPILED_WIDTHS* = 14
+  CONFIG_TABLES_LENGTH* = 15
+
+const
+  ZERO_TERMINATED* = not 0.csize_t
+  UNSET* = not 0.csize_t
+
+type
+  Pcre* = object
+  CompileContext* = object
+  GeneralContext* = object
+  MatchContext* = object
+  MatchData* = object
+  JitStack* = object
+
+when not defined(usePcreHeader):
+  when hostOS == "windows":
+    const pcre2Dll = "pcre2-8.dll"
+  elif hostOS == "macosx":
+    const pcre2Dll = "libpcre2-8(.0|).dylib"
+  else:
+    const pcre2Dll = "libpcre2-8.so(.0|)"
+  {.push dynlib: pcre2Dll.}
+else:
+  {.passC: "-DPCRE2_CODE_UNIT_WIDTH=8".}
+  {.push header: "<pcre2.h>".}
+
+{.push cdecl, importc: "pcre2_$1_8".}
+
+proc compile*(pattern: ptr uint8,
+              length: csize_t,
+              options: uint32,
+              errorCode: ptr cint,
+              errorOffset: ptr csize_t,
+              context: ptr CompileContext): ptr Pcre
+
+proc code_free*(code: ptr Pcre)
+
+proc config*(what: uint32,
+             where: pointer): cint
+
+proc get_error_message*(errorCode: cint,
+                        buffer: ptr uint8,
+                        bufferLength: csize_t): cint
+
+proc match*(code: ptr Pcre,
+            subject: ptr uint8,
+            length: csize_t,
+            startOffset: csize_t,
+            options: uint32,
+            matchData: ptr MatchData,
+            context: ptr MatchContext): cint
+
+proc match_data_create*(oveccount: uint32,
+                        context: ptr GeneralContext): ptr MatchData
+
+proc match_data_create_from_pattern*(code: ptr Pcre,
+                                     context: ptr GeneralContext): ptr MatchData
+
+proc match_data_free*(matchData: ptr MatchData)
+
+proc get_ovector_pointer*(matchData: ptr MatchData): ptr csize_t
+
+proc get_ovector_count*(matchData: ptr MatchData): uint32
+
+proc pattern_info*(code: ptr Pcre,
+                   what: uint32,
+                   where: pointer): cint
+
+proc jit_compile*(code: ptr Pcre,
+                  options: uint32): cint
+
+proc jit_free_unused_memory*()
+
+{.pop.}
+{.pop.}
--- a/tools/ci_generate.nim
+++ b/tools/ci_generate.nim
@@ -110,7 +110,7 @@ image: freebsd/latest
 packages:
 - databases/sqlite3
 - devel/boehm-gc-threaded
- devel/pcre
+- devel/pcre2
 - devel/sdl20
 - devel/sfml
 - www/node
@@ -124,7 +124,7 @@ packages:
 - sqlite3
 - node
 - boehm-gc
- pcre
+- pcre2
 - sfml
 - sdl2
 - libffi
--- a/tools/kochdocs.nim
+++ b/tools/kochdocs.nim
@@ -126,6 +126,7 @@ mm.md
  withoutIndex = """
 lib/wrappers/tinyc.nim
 lib/wrappers/pcre.nim
+lib/wrappers/pcre2.nim
 lib/wrappers/openssl.nim
 lib/posix/posix.nim
 lib/posix/linux.nim
--- a/tools/nimgrep.nim
+++ b/tools/nimgrep.nim
@@ -729,7 +729,7 @@ iterator searchFile(pattern: Pattern; buffer: string): Output =
    i = t.last+1
  when typeof(pattern) is Regex:
    if buffer.len > MaxReBufSize:
-      yield Output(kind: openError, msg: "PCRE size limit is " & $MaxReBufSize)
+      yield Output(kind: openError, msg: "PCRE2 size limit is " & $MaxReBufSize)

 func detectBin(buffer: string): bool =
  for i in 0 ..< min(1024, buffer.len):