diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim index e5364ba67d..70b8d16fa1 100644 --- a/lib/impure/nre.nim +++ b/lib/impure/nre.nim @@ -61,7 +61,7 @@ runnableExamples: assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab" assert find("uxabc", re"ab", start = 3).isNone -from ../wrappers/pcre2 import nil +from std/pcre2 import nil import nre/private/util import std/tables from std/strutils import `%` @@ -136,8 +136,6 @@ type ## are recognized only in UTF-8 mode. ## — man pcre ## - ## - `(*JAVASCRIPT_COMPAT)` - JavaScript compatibility - ## - `(*NO_STUDY)` - turn off studying; study is enabled by default ## ## For more details on the leading option groups, see the `Option ## Setting `_ @@ -261,7 +259,7 @@ proc getNameToNumberTable(pattern: Regex): Table[string, int] = result[name] = num -proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex = +proc initRegex(pattern: string, flags: csize_t, options: uint32, noJit: bool): Regex = when defined(gcDestructors): result = Regex() else: @@ -279,15 +277,11 @@ proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex = # failed to compile raise SyntaxError(msg: $errorCode, pos: int errOffset, pattern: pattern) - # if study: - # var options: cint = 0 - # var hasJit: cint - # if pcre2.config(pcre.CONFIG_JIT, addr hasJit) == 0: - # if hasJit == 1'i32: - # options = pcre2.STUDY_JIT_COMPILE - # result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg) - # if errorMsg != nil: - # raise StudyError(msg: $errorMsg) + if not noJit: + var hasJit: cint = cint(0) + if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0: + if hasJit == 1'i32 and pcre2.jit_compile(result.pcreObj, pcre2.JIT_COMPLETE) != 0: + raise StudyError(msg: "JIT compilation failed.") result.captureNameToId = result.getNameToNumberTable() @@ -438,9 +432,9 @@ const PcreOptions = { "DOLLAR_ENDONLY": pcre2.DOLLAR_ENDONLY, "FIRSTLINE": pcre2.FIRSTLINE, "NO_AUTO_CAPTURE": pcre2.NO_AUTO_CAPTURE, - # "JAVASCRIPT_COMPAT": pcre2.JAVASCRIPT_COMPAT, "U": pcre2.UTF or pcre2.UCP # TODO: UTF-8 ? }.toTable +# TODO: maybe add JIT? # Options that are supported inside regular expressions themselves const SkipOptions = [ @@ -449,8 +443,8 @@ const SkipOptions = [ "CR", "LF", "CRLF", "ANYCRLF", "ANY", "BSR_ANYCRLF", "BSR_UNICODE" ] -proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] = - result = ("", 0'u32) +proc extractOptions(pattern: string): tuple[pattern: string, options: uint32, noJit: bool] = + result = ("", 0'u32, false) var optionStart = 0 var equals = false @@ -470,8 +464,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] = result.pattern.add pattern[optionStart .. i] elif PcreOptions.hasKey name: result.options = result.options or PcreOptions[name] - # elif name == "NO_STUDY": - # result.study = false + elif name == "NO_STUDY": + result.noJit = true else: break optionStart = i+1 @@ -488,8 +482,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] = result.pattern.add pattern[optionStart .. pattern.high] proc re*(pattern: string): Regex = - let (pattern, options) = extractOptions(pattern) - initRegex(pattern, pcre2.ZERO_TERMINATED, options) + let (pattern, options, noJit) = extractOptions(pattern) + initRegex(pattern, pcre2.ZERO_TERMINATED, options, noJit) proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32): Option[RegexMatch] = var myResult = RegexMatch(pattern: pattern, str: str) @@ -517,12 +511,7 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32) let ovector = cast[ptr UncheckedArray[csize_t]](pcre2.get_ovector_pointer(matchData)) let capture_count = pcre2.get_ovector_count(matchData) let ovector_size = 2 * capture_count.int * sizeof(csize_t) - # echo (myResult.pcreMatchBounds.len * 2 * sizeof(csize_t), ovector_size) - # echo (capture_count, ovector[0], ovector[1]) copyMem(addr myResult.pcreMatchBounds[0], ovector, ovector_size) - # echo (myResult.pcreMatchBounds[0].a, myResult.pcreMatchBounds[0].b) - - # echo " -> ", myResult if execRet >= 0: return some(myResult) diff --git a/lib/impure/re.nim b/lib/impure/re.nim index deceb9739b..beb26b1ede 100644 --- a/lib/impure/re.nim +++ b/lib/impure/re.nim @@ -38,7 +38,7 @@ runnableExamples: import std/[strutils, rtarrays] -import ../wrappers/pcre2 +import std/pcre2 when defined(nimPreviewSlimSystem): import std/syncio @@ -115,7 +115,7 @@ proc re*(s: string, flags = {reStudy}): Regex = options = options or CASELESS result.h = rawCompile(s, cast[csize_t](ZERO_TERMINATED), options) if reStudy in flags: # TODO: add reJit - var hasJit: cint = 0 + var hasJit: cint = cint(0) if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0: if hasJit == 1'i32 and jit_compile(result.h, pcre2.JIT_COMPLETE) != 0: raiseInvalidRegex("JIT compilation failed.") diff --git a/tests/stdlib/nre/init.nim b/tests/stdlib/nre/init.nim index fd160f542a..57162fe8f2 100644 --- a/tests/stdlib/nre/init.nim +++ b/tests/stdlib/nre/init.nim @@ -1,7 +1,7 @@ import unittest include nre -from ../../../lib/wrappers/pcre2 import nil +from std/pcre2 import nil block: # Test NRE initialization block: # correct initialization @@ -10,23 +10,23 @@ block: # Test NRE initialization block: # options check(extractOptions("(*NEVER_UTF)") == - ("", pcre2.NEVER_UTF)) + ("", pcre2.NEVER_UTF, false)) check(extractOptions("(*UTF8)(*ANCHORED)(*UCP)z") == - ("(*UTF8)(*UCP)z", pcre2.ANCHORED)) + ("(*UTF8)(*UCP)z", pcre2.ANCHORED, false)) # check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") == # ("(*UTF8)z", pcre2.ANCHORED or pcre2.JAVASCRIPT_COMPAT, true)) # check(extractOptions("(*NO_STUDY)(") == ("(", 0'u32)) check(extractOptions("(*LIMIT_MATCH=6)(*ANCHORED)z") == - ("(*LIMIT_MATCH=6)z", pcre2.ANCHORED)) + ("(*LIMIT_MATCH=6)z", pcre2.ANCHORED, false)) block: # incorrect options for s in ["CR", "(CR", "(*CR", "(*abc)", "(*abc)CR", "(?i)", "(*LIMIT_MATCH=5", "(*NO_AUTO_POSSESS=5)"]: let ss = s & "(*NEVER_UTF)" - check(extractOptions(ss) == (ss, 0'u32)) + check(extractOptions(ss) == (ss, 0'u32, false)) block: # invalid regex # expect(SyntaxError): discard re("[0-9")