mirror of
https://github.com/nim-lang/Nim.git
synced 2026-01-04 04:02:41 +00:00
progress
This commit is contained in:
@@ -61,7 +61,7 @@ runnableExamples:
|
||||
assert find("uxabc", re"(?<=x|y)ab", start = 1).get.captures[-1] == "ab"
|
||||
assert find("uxabc", re"ab", start = 3).isNone
|
||||
|
||||
from ../wrappers/pcre2 import nil
|
||||
from std/pcre2 import nil
|
||||
import nre/private/util
|
||||
import std/tables
|
||||
from std/strutils import `%`
|
||||
@@ -136,8 +136,6 @@ type
|
||||
## are recognized only in UTF-8 mode.
|
||||
## — man pcre
|
||||
##
|
||||
## - `(*JAVASCRIPT_COMPAT)` - JavaScript compatibility
|
||||
## - `(*NO_STUDY)` - turn off studying; study is enabled by default
|
||||
##
|
||||
## For more details on the leading option groups, see the `Option
|
||||
## Setting <https://man7.org/linux/man-pages/man3/pcresyntax.3.html#OPTION_SETTING>`_
|
||||
@@ -261,7 +259,7 @@ proc getNameToNumberTable(pattern: Regex): Table[string, int] =
|
||||
|
||||
result[name] = num
|
||||
|
||||
proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
|
||||
proc initRegex(pattern: string, flags: csize_t, options: uint32, noJit: bool): Regex =
|
||||
when defined(gcDestructors):
|
||||
result = Regex()
|
||||
else:
|
||||
@@ -279,15 +277,11 @@ proc initRegex(pattern: string, flags: csize_t, options: uint32): Regex =
|
||||
# failed to compile
|
||||
raise SyntaxError(msg: $errorCode, pos: int errOffset, pattern: pattern)
|
||||
|
||||
# if study:
|
||||
# var options: cint = 0
|
||||
# var hasJit: cint
|
||||
# if pcre2.config(pcre.CONFIG_JIT, addr hasJit) == 0:
|
||||
# if hasJit == 1'i32:
|
||||
# options = pcre2.STUDY_JIT_COMPILE
|
||||
# result.pcreExtra = pcre.study(result.pcreObj, options, addr errorMsg)
|
||||
# if errorMsg != nil:
|
||||
# raise StudyError(msg: $errorMsg)
|
||||
if not noJit:
|
||||
var hasJit: cint = cint(0)
|
||||
if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
|
||||
if hasJit == 1'i32 and pcre2.jit_compile(result.pcreObj, pcre2.JIT_COMPLETE) != 0:
|
||||
raise StudyError(msg: "JIT compilation failed.")
|
||||
|
||||
result.captureNameToId = result.getNameToNumberTable()
|
||||
|
||||
@@ -438,9 +432,9 @@ const PcreOptions = {
|
||||
"DOLLAR_ENDONLY": pcre2.DOLLAR_ENDONLY,
|
||||
"FIRSTLINE": pcre2.FIRSTLINE,
|
||||
"NO_AUTO_CAPTURE": pcre2.NO_AUTO_CAPTURE,
|
||||
# "JAVASCRIPT_COMPAT": pcre2.JAVASCRIPT_COMPAT,
|
||||
"U": pcre2.UTF or pcre2.UCP # TODO: UTF-8 ?
|
||||
}.toTable
|
||||
# TODO: maybe add JIT?
|
||||
|
||||
# Options that are supported inside regular expressions themselves
|
||||
const SkipOptions = [
|
||||
@@ -449,8 +443,8 @@ const SkipOptions = [
|
||||
"CR", "LF", "CRLF", "ANYCRLF", "ANY", "BSR_ANYCRLF", "BSR_UNICODE"
|
||||
]
|
||||
|
||||
proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
|
||||
result = ("", 0'u32)
|
||||
proc extractOptions(pattern: string): tuple[pattern: string, options: uint32, noJit: bool] =
|
||||
result = ("", 0'u32, false)
|
||||
|
||||
var optionStart = 0
|
||||
var equals = false
|
||||
@@ -470,8 +464,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
|
||||
result.pattern.add pattern[optionStart .. i]
|
||||
elif PcreOptions.hasKey name:
|
||||
result.options = result.options or PcreOptions[name]
|
||||
# elif name == "NO_STUDY":
|
||||
# result.study = false
|
||||
elif name == "NO_STUDY":
|
||||
result.noJit = true
|
||||
else:
|
||||
break
|
||||
optionStart = i+1
|
||||
@@ -488,8 +482,8 @@ proc extractOptions(pattern: string): tuple[pattern: string, options: uint32] =
|
||||
result.pattern.add pattern[optionStart .. pattern.high]
|
||||
|
||||
proc re*(pattern: string): Regex =
|
||||
let (pattern, options) = extractOptions(pattern)
|
||||
initRegex(pattern, pcre2.ZERO_TERMINATED, options)
|
||||
let (pattern, options, noJit) = extractOptions(pattern)
|
||||
initRegex(pattern, pcre2.ZERO_TERMINATED, options, noJit)
|
||||
|
||||
proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32): Option[RegexMatch] =
|
||||
var myResult = RegexMatch(pattern: pattern, str: str)
|
||||
@@ -517,12 +511,7 @@ proc matchImpl(str: string, pattern: Regex, start, endpos: int, options: uint32)
|
||||
let ovector = cast[ptr UncheckedArray[csize_t]](pcre2.get_ovector_pointer(matchData))
|
||||
let capture_count = pcre2.get_ovector_count(matchData)
|
||||
let ovector_size = 2 * capture_count.int * sizeof(csize_t)
|
||||
# echo (myResult.pcreMatchBounds.len * 2 * sizeof(csize_t), ovector_size)
|
||||
# echo (capture_count, ovector[0], ovector[1])
|
||||
copyMem(addr myResult.pcreMatchBounds[0], ovector, ovector_size)
|
||||
# echo (myResult.pcreMatchBounds[0].a, myResult.pcreMatchBounds[0].b)
|
||||
|
||||
# echo " -> ", myResult
|
||||
if execRet >= 0:
|
||||
return some(myResult)
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ runnableExamples:
|
||||
import
|
||||
std/[strutils, rtarrays]
|
||||
|
||||
import ../wrappers/pcre2
|
||||
import std/pcre2
|
||||
|
||||
when defined(nimPreviewSlimSystem):
|
||||
import std/syncio
|
||||
@@ -115,7 +115,7 @@ proc re*(s: string, flags = {reStudy}): Regex =
|
||||
options = options or CASELESS
|
||||
result.h = rawCompile(s, cast[csize_t](ZERO_TERMINATED), options)
|
||||
if reStudy in flags: # TODO: add reJit
|
||||
var hasJit: cint = 0
|
||||
var hasJit: cint = cint(0)
|
||||
if pcre2.config(pcre2.CONFIG_JIT, addr hasJit) == 0:
|
||||
if hasJit == 1'i32 and jit_compile(result.h, pcre2.JIT_COMPLETE) != 0:
|
||||
raiseInvalidRegex("JIT compilation failed.")
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import unittest
|
||||
include nre
|
||||
|
||||
from ../../../lib/wrappers/pcre2 import nil
|
||||
from std/pcre2 import nil
|
||||
|
||||
block: # Test NRE initialization
|
||||
block: # correct initialization
|
||||
@@ -10,23 +10,23 @@ block: # Test NRE initialization
|
||||
|
||||
block: # options
|
||||
check(extractOptions("(*NEVER_UTF)") ==
|
||||
("", pcre2.NEVER_UTF))
|
||||
("", pcre2.NEVER_UTF, false))
|
||||
check(extractOptions("(*UTF8)(*ANCHORED)(*UCP)z") ==
|
||||
("(*UTF8)(*UCP)z", pcre2.ANCHORED))
|
||||
("(*UTF8)(*UCP)z", pcre2.ANCHORED, false))
|
||||
# check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") ==
|
||||
# ("(*UTF8)z", pcre2.ANCHORED or pcre2.JAVASCRIPT_COMPAT, true))
|
||||
|
||||
# check(extractOptions("(*NO_STUDY)(") == ("(", 0'u32))
|
||||
|
||||
check(extractOptions("(*LIMIT_MATCH=6)(*ANCHORED)z") ==
|
||||
("(*LIMIT_MATCH=6)z", pcre2.ANCHORED))
|
||||
("(*LIMIT_MATCH=6)z", pcre2.ANCHORED, false))
|
||||
|
||||
block: # incorrect options
|
||||
for s in ["CR", "(CR", "(*CR", "(*abc)", "(*abc)CR",
|
||||
"(?i)",
|
||||
"(*LIMIT_MATCH=5", "(*NO_AUTO_POSSESS=5)"]:
|
||||
let ss = s & "(*NEVER_UTF)"
|
||||
check(extractOptions(ss) == (ss, 0'u32))
|
||||
check(extractOptions(ss) == (ss, 0'u32, false))
|
||||
|
||||
block: # invalid regex
|
||||
# expect(SyntaxError): discard re("[0-9")
|
||||
|
||||
Reference in New Issue
Block a user