This commit is contained in:
ringabout
2024-11-06 23:09:25 +08:00
parent 0e3ac70615
commit 07de39cde6
3 changed files with 21 additions and 32 deletions

View File

@@ -61,7 +61,7 @@ runnableExamples:
assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
assert find("uxabc", re"ab", start = 3).isNone
from ../wrappers/pcre2 import nil
from std/pcre2 import nil
import nre/private/util
import std/tables
from std/strutils import `%`
@@ -136,8 +136,6 @@ type
## are recognized only in UTF-8 mode.
## — man pcre
##
## - `(*JAVASCRIPT_COMPAT)` - JavaScript compatibility
## - `(*NO_STUDY)` - turn off studying; study is enabled by default
##
## For more details on the leading option groups, see the `Option
## Setting <https://man7.org/linux/man-pages/man3/pcresyntax.3.html#OPTION_SETTING>`_
@@ -261,7 +259,7 @@ proc getNameToNumberTable(pattern: Regex): Table[string, int] =
result[name] = num
proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
proc initRegex(pattern: string, flags: csize_t, options: uint32, noJit: bool): Regex =
when defined(gcDestructors):
result = Regex()
else:
@@ -279,15 +277,11 @@ proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
# failed to compile
raise SyntaxError(msg: $errorCode, pos: int errOffset, pattern: pattern)
# if study:
# var options: cint = 0
# var hasJit: cint
# if pcre2.config(pcre.CONFIG_JIT, addr hasJit) == 0:
# if hasJit == 1'i32:
# options = pcre2.STUDY_JIT_COMPILE
# result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
# if errorMsg != nil:
# raise StudyError(msg: $errorMsg)
if not noJit:
var hasJit: cint = cint(0)
if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
if hasJit == 1'i32 and pcre2.jit_compile(result.pcreObj, pcre2.JIT_COMPLETE) != 0:
raise StudyError(msg: "JIT compilation failed.")
result.captureNameToId = result.getNameToNumberTable()
@@ -438,9 +432,9 @@ const PcreOptions = {
"DOLLAR_ENDONLY": pcre2.DOLLAR_ENDONLY,
"FIRSTLINE": pcre2.FIRSTLINE,
"NO_AUTO_CAPTURE": pcre2.NO_AUTO_CAPTURE,
# "JAVASCRIPT_COMPAT": pcre2.JAVASCRIPT_COMPAT,
"U": pcre2.UTF or pcre2.UCP # TODO: UTF-8 ?
}.toTable
# TODO: maybe add JIT?
# Options that are supported inside regular expressions themselves
const SkipOptions = [
@@ -449,8 +443,8 @@ const SkipOptions = [
"CR", "LF", "CRLF", "ANYCRLF", "ANY", "BSR_ANYCRLF", "BSR_UNICODE"
]
proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
result = ("", 0'u32)
proc extractOptions(pattern: string): tuple[pattern: string, options: uint32, noJit: bool] =
result = ("", 0'u32, false)
var optionStart = 0
var equals = false
@@ -470,8 +464,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
result.pattern.add pattern[optionStart .. i]
elif PcreOptions.hasKey name:
result.options = result.options or PcreOptions[name]
# elif name == "NO_STUDY":
# result.study = false
elif name == "NO_STUDY":
result.noJit = true
else:
break
optionStart = i+1
@@ -488,8 +482,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
result.pattern.add pattern[optionStart .. pattern.high]
proc re*(pattern: string): Regex =
let (pattern, options) = extractOptions(pattern)
initRegex(pattern, pcre2.ZERO_TERMINATED, options)
let (pattern, options, noJit) = extractOptions(pattern)
initRegex(pattern, pcre2.ZERO_TERMINATED, options, noJit)
proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32): Option[RegexMatch] =
var myResult = RegexMatch(pattern: pattern, str: str)
@@ -517,12 +511,7 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32)
let ovector = cast[ptr UncheckedArray[csize_t]](pcre2.get_ovector_pointer(matchData))
let capture_count = pcre2.get_ovector_count(matchData)
let ovector_size = 2 * capture_count.int * sizeof(csize_t)
# echo (myResult.pcreMatchBounds.len * 2 * sizeof(csize_t), ovector_size)
# echo (capture_count, ovector[0], ovector[1])
copyMem(addr myResult.pcreMatchBounds[0], ovector, ovector_size)
# echo (myResult.pcreMatchBounds[0].a, myResult.pcreMatchBounds[0].b)
# echo " -> ", myResult
if execRet >= 0:
return some(myResult)

View File

@@ -38,7 +38,7 @@ runnableExamples:
import
std/[strutils, rtarrays]
import ../wrappers/pcre2
import std/pcre2
when defined(nimPreviewSlimSystem):
import std/syncio
@@ -115,7 +115,7 @@ proc re*(s: string, flags = {reStudy}): Regex =
options = options or CASELESS
result.h = rawCompile(s, cast[csize_t](ZERO_TERMINATED), options)
if reStudy in flags: # TODO: add reJit
var hasJit: cint = 0
var hasJit: cint = cint(0)
if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
if hasJit == 1'i32 and jit_compile(result.h, pcre2.JIT_COMPLETE) != 0:
raiseInvalidRegex("JIT compilation failed.")

View File

@@ -1,7 +1,7 @@
import unittest
include nre
from ../../../lib/wrappers/pcre2 import nil
from std/pcre2 import nil
block: # Test NRE initialization
block: # correct initialization
@@ -10,23 +10,23 @@ block: # Test NRE initialization
block: # options
check(extractOptions("(*NEVER_UTF)") ==
("", pcre2.NEVER_UTF))
("", pcre2.NEVER_UTF, false))
check(extractOptions("(*UTF8)(*ANCHORED)(*UCP)z") ==
("(*UTF8)(*UCP)z", pcre2.ANCHORED))
("(*UTF8)(*UCP)z", pcre2.ANCHORED, false))
# check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") ==
# ("(*UTF8)z", pcre2.ANCHORED or pcre2.JAVASCRIPT_COMPAT, true))
# check(extractOptions("(*NO_STUDY)(") == ("(", 0'u32))
check(extractOptions("(*LIMIT_MATCH=6)(*ANCHORED)z") ==
("(*LIMIT_MATCH=6)z", pcre2.ANCHORED))
("(*LIMIT_MATCH=6)z", pcre2.ANCHORED, false))
block: # incorrect options
for s in ["CR", "(CR", "(*CR", "(*abc)", "(*abc)CR",
"(?i)",
"(*LIMIT_MATCH=5", "(*NO_AUTO_POSSESS=5)"]:
let ss = s & "(*NEVER_UTF)"
check(extractOptions(ss) == (ss, 0'u32))
check(extractOptions(ss) == (ss, 0'u32, false))
block: # invalid regex
# expect(SyntaxError): discard re("[0-9")