progress

2026-01-04 04:02:41 +00:00 · 2024-11-06 23:09:25 +08:00
parent 0e3ac70615
commit 07de39cde6
3 changed files with 21 additions and 32 deletions
--- a/lib/impure/nre.nim
+++ b/lib/impure/nre.nim
@@ -61,7 +61,7 @@ runnableExamples:
  assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
  assert find("uxabc", re"ab", start = 3).isNone

-from ../wrappers/pcre2 import nil
+from std/pcre2 import nil
 import nre/private/util
 import std/tables
 from std/strutils import `%`
@@ -136,8 +136,6 @@ type
    ##     are recognized only in UTF-8 mode.
    ##     —  man pcre
    ##
-    ## -  `(*JAVASCRIPT_COMPAT)` - JavaScript compatibility
-    ## -  `(*NO_STUDY)` - turn off studying; study is enabled by default
    ##
    ## For more details on the leading option groups, see the `Option
    ## Setting <https://man7.org/linux/man-pages/man3/pcresyntax.3.html#OPTION_SETTING>`_
@@ -261,7 +259,7 @@ proc getNameToNumberTable(pattern: Regex): Table[string, int] =

    result[name] = num

-proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
+proc initRegex(pattern: string, flags: csize_t, options: uint32, noJit: bool): Regex =
  when defined(gcDestructors):
    result = Regex()
  else:
@@ -279,15 +277,11 @@ proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
    # failed to compile
    raise SyntaxError(msg: $errorCode, pos: int errOffset, pattern: pattern)

-  # if study:
-  #   var options: cint = 0
-  #   var hasJit: cint
-  #   if pcre2.config(pcre.CONFIG_JIT, addr hasJit) == 0:
-  #     if hasJit == 1'i32:
-  #       options = pcre2.STUDY_JIT_COMPILE
-  #   result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
-  #   if errorMsg != nil:
-  #     raise StudyError(msg: $errorMsg)
+  if not noJit:
+    var hasJit: cint = cint(0)
+    if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
+      if hasJit == 1'i32 and pcre2.jit_compile(result.pcreObj, pcre2.JIT_COMPLETE) != 0:
+        raise StudyError(msg: "JIT compilation failed.")

  result.captureNameToId = result.getNameToNumberTable()

@@ -438,9 +432,9 @@ const PcreOptions = {
  "DOLLAR_ENDONLY": pcre2.DOLLAR_ENDONLY,
  "FIRSTLINE": pcre2.FIRSTLINE,
  "NO_AUTO_CAPTURE": pcre2.NO_AUTO_CAPTURE,
-  # "JAVASCRIPT_COMPAT": pcre2.JAVASCRIPT_COMPAT,
  "U": pcre2.UTF or pcre2.UCP # TODO: UTF-8 ?
 }.toTable
+# TODO: maybe add JIT?

 # Options that are supported inside regular expressions themselves
 const SkipOptions = [
@@ -449,8 +443,8 @@ const SkipOptions = [
  "CR", "LF", "CRLF", "ANYCRLF", "ANY", "BSR_ANYCRLF", "BSR_UNICODE"
 ]

-proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
-  result = ("", 0'u32)
+proc extractOptions(pattern: string): tuple[pattern: string, options: uint32, noJit: bool] =
+  result = ("", 0'u32, false)

  var optionStart = 0
  var equals = false
@@ -470,8 +464,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
        result.pattern.add pattern[optionStart .. i]
      elif PcreOptions.hasKey name:
        result.options = result.options or PcreOptions[name]
-      # elif name == "NO_STUDY":
-      #   result.study = false
+      elif name == "NO_STUDY":
+        result.noJit = true
      else:
        break
      optionStart = i+1
@@ -488,8 +482,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
  result.pattern.add pattern[optionStart .. pattern.high]

 proc re*(pattern: string): Regex =
-  let (pattern, options) = extractOptions(pattern)
-  initRegex(pattern, pcre2.ZERO_TERMINATED, options)
+  let (pattern, options, noJit) = extractOptions(pattern)
+  initRegex(pattern, pcre2.ZERO_TERMINATED, options, noJit)

 proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32): Option[RegexMatch] =
  var myResult = RegexMatch(pattern: pattern, str: str)
@@ -517,12 +511,7 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32)
  let ovector = cast[ptr UncheckedArray[csize_t]](pcre2.get_ovector_pointer(matchData))
  let capture_count = pcre2.get_ovector_count(matchData)
  let ovector_size = 2 * capture_count.int * sizeof(csize_t)
-  # echo (myResult.pcreMatchBounds.len * 2 * sizeof(csize_t), ovector_size)
-  # echo (capture_count, ovector[0], ovector[1])
  copyMem(addr myResult.pcreMatchBounds[0], ovector, ovector_size)
-  # echo (myResult.pcreMatchBounds[0].a, myResult.pcreMatchBounds[0].b)
-
-  # echo " -> ", myResult
  if execRet >= 0:
    return some(myResult)

--- a/lib/impure/re.nim
+++ b/lib/impure/re.nim
@@ -38,7 +38,7 @@ runnableExamples:
 import
  std/[strutils, rtarrays]

-import ../wrappers/pcre2
+import std/pcre2

 when defined(nimPreviewSlimSystem):
  import std/syncio
@@ -115,7 +115,7 @@ proc re*(s: string, flags = {reStudy}): Regex =
    options = options or CASELESS
  result.h = rawCompile(s, cast[csize_t](ZERO_TERMINATED), options)
  if reStudy in flags: # TODO: add reJit
-    var hasJit: cint = 0
+    var hasJit: cint = cint(0)
    if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
      if hasJit == 1'i32 and jit_compile(result.h, pcre2.JIT_COMPLETE) != 0:
        raiseInvalidRegex("JIT compilation failed.")
--- a/tests/stdlib/nre/init.nim
+++ b/tests/stdlib/nre/init.nim
@@ -1,7 +1,7 @@
 import unittest
 include nre

-from ../../../lib/wrappers/pcre2 import nil
+from std/pcre2 import nil

 block: # Test NRE initialization
  block: # correct initialization
@@ -10,23 +10,23 @@ block: # Test NRE initialization

  block: # options
    check(extractOptions("(*NEVER_UTF)") ==
-          ("", pcre2.NEVER_UTF))
+          ("", pcre2.NEVER_UTF, false))
    check(extractOptions("(*UTF8)(*ANCHORED)(*UCP)z") ==
-          ("(*UTF8)(*UCP)z", pcre2.ANCHORED))
+          ("(*UTF8)(*UCP)z", pcre2.ANCHORED, false))
    # check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") ==
    #       ("(*UTF8)z", pcre2.ANCHORED or pcre2.JAVASCRIPT_COMPAT, true))

    # check(extractOptions("(*NO_STUDY)(") == ("(", 0'u32))

    check(extractOptions("(*LIMIT_MATCH=6)(*ANCHORED)z") ==
-          ("(*LIMIT_MATCH=6)z", pcre2.ANCHORED))
+          ("(*LIMIT_MATCH=6)z", pcre2.ANCHORED, false))

  block: # incorrect options
    for s in ["CR", "(CR", "(*CR", "(*abc)", "(*abc)CR",
              "(?i)",
              "(*LIMIT_MATCH=5", "(*NO_AUTO_POSSESS=5)"]:
      let ss = s & "(*NEVER_UTF)"
-      check(extractOptions(ss) == (ss, 0'u32))
+      check(extractOptions(ss) == (ss, 0'u32, false))

  block: # invalid regex
    # expect(SyntaxError): discard re("[0-9")