mirror of
https://github.com/nim-lang/Nim.git
synced 2026-06-08 04:44:20 +00:00
[feature] stdlib: strutils.multiReplace for character sets (#24805)
Multiple replacements based on character sets in a single pass. Useful
for string sanitation. Follows existing `multiReplace` semantics.
Note: initially copied the substring version logic with a `while` and a
named block break, but Godbolt showed it had produced slightly larger
assembly using higher registers than the final version.
- [x] Tests
- [x] changelog.md
(cherry picked from commit 909f3b8b79)
This commit is contained in:
@@ -25,6 +25,8 @@ errors.
|
||||
- `setutils.symmetricDifference` along with its operator version
|
||||
`` setutils.`-+-` `` and in-place version `setutils.toggle` have been added
|
||||
to more efficiently calculate the symmetric difference of bitsets.
|
||||
- `strutils.multiReplace` overload for character set replacements in a single pass.
|
||||
Useful for string sanitation. Follows existing multiReplace semantics.
|
||||
|
||||
[//]: # "Changes:"
|
||||
- `std/math` The `^` symbol now supports floating-point as exponent in addition to the Natural type.
|
||||
|
||||
@@ -2202,7 +2202,8 @@ func replace*(s, sub: string, by = ""): string {.rtl,
|
||||
## * `replace func<#replace,string,char,char>`_ for replacing
|
||||
## single characters
|
||||
## * `replaceWord func<#replaceWord,string,string,string>`_
|
||||
## * `multiReplace func<#multiReplace,string,varargs[]>`_
|
||||
## * `multiReplace func<#multiReplace,string,varargs[]>`_ for substrings
|
||||
## * `multiReplace func<#multiReplace,openArray[char],varargs[]>`_ for single characters
|
||||
result = ""
|
||||
let subLen = sub.len
|
||||
if subLen == 0:
|
||||
@@ -2245,7 +2246,8 @@ func replace*(s: string, sub, by: char): string {.rtl,
|
||||
## See also:
|
||||
## * `find func<#find,string,char,Natural,int>`_
|
||||
## * `replaceWord func<#replaceWord,string,string,string>`_
|
||||
## * `multiReplace func<#multiReplace,string,varargs[]>`_
|
||||
## * `multiReplace func<#multiReplace,string,varargs[]>`_ for substrings
|
||||
## * `multiReplace func<#multiReplace,openArray[char],varargs[]>`_ for single characters
|
||||
result = newString(s.len)
|
||||
var i = 0
|
||||
while i < s.len:
|
||||
@@ -2330,7 +2332,39 @@ func multiReplace*(s: string, replacements: varargs[(string, string)]): string =
|
||||
add result, s[i]
|
||||
inc(i)
|
||||
|
||||
|
||||
func multiReplace*(s: openArray[char]; replacements: varargs[(set[char], char)]): string {.noinit.} =
|
||||
## Performs multiple character replacements in a single pass through the input.
|
||||
##
|
||||
## `multiReplace` scans the input `s` from left to right and replaces
|
||||
## characters based on character sets, applying the first matching replacement
|
||||
## at each position. Useful for sanitizing or transforming strings with
|
||||
## predefined character mappings.
|
||||
##
|
||||
## The order of the `replacements` matters:
|
||||
## - First matching replacement is applied
|
||||
## - Subsequent replacements are not considered for the same character
|
||||
##
|
||||
## See also:
|
||||
## - `multiReplace(s: string; replacements: varargs[(string, string)]) <#multiReplace,string,varargs[]>`_,
|
||||
runnableExamples:
|
||||
const WinSanitationRules = [
|
||||
({'\0'..'\31'}, ' '),
|
||||
({'"'}, '\''),
|
||||
({'/', '\\', ':', '|'}, '-'),
|
||||
({'*', '?', '<', '>'}, '_'),
|
||||
]
|
||||
# Sanitize a filename with Windows-incompatible characters
|
||||
const file = "a/file:with?invalid*chars.txt"
|
||||
doAssert file.multiReplace(WinSanitationRules) == "a-file-with_invalid_chars.txt"
|
||||
{.cast(noSideEffect).}:
|
||||
result = newStringUninit(s.len)
|
||||
for i in 0..<s.len:
|
||||
var nextChar = s[i]
|
||||
for subs, by in replacements.items:
|
||||
if nextChar in subs:
|
||||
nextChar = by
|
||||
break
|
||||
result[i] = nextChar
|
||||
|
||||
func insertSep*(s: string, sep = '_', digits = 3): string {.rtl,
|
||||
extern: "nsuInsertSep".} =
|
||||
|
||||
@@ -575,12 +575,29 @@ template main() =
|
||||
doAssert "-lda-ldz -ld abc".replaceWord("-ld") == "-lda-ldz abc"
|
||||
doAssert "-lda-ldz -ld abc".replaceWord("") == "-lda-ldz -ld abc"
|
||||
|
||||
block: # multiReplace
|
||||
block: # multiReplace substrings
|
||||
doAssert "abba".multiReplace(("a", "b"), ("b", "a")) == "baab"
|
||||
doAssert "Hello World.".multiReplace(("ello", "ELLO"), ("World.",
|
||||
"PEOPLE!")) == "HELLO PEOPLE!"
|
||||
doAssert "aaaa".multiReplace(("a", "aa"), ("aa", "bb")) == "aaaaaaaa"
|
||||
|
||||
block: # multiReplace characters
|
||||
# https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#naming-conventions
|
||||
const SanitationRules = [
|
||||
({'\0'..'\31'}, ' '),
|
||||
({'"'}, '\''),
|
||||
({'/', '\\', ':', '|'}, '-'),
|
||||
({'*', '?', '<', '>'}, '_'),
|
||||
]
|
||||
# Basic character set replacements
|
||||
doAssert multiReplace("abba", SanitationRules) == "abba"
|
||||
doAssert multiReplace("a/b\\c:d", SanitationRules) == "a-b-c-d"
|
||||
doAssert multiReplace("a*b?c", SanitationRules) == "a_b_c"
|
||||
doAssert multiReplace("\0\3test", SanitationRules) == " test"
|
||||
doAssert multiReplace("testquote\"", SanitationRules) == "testquote'"
|
||||
doAssert multiReplace("", SanitationRules) == ""
|
||||
doAssert multiReplace("/\\:*?\"\0<>", ({'\0'..'\255'}, '.')) == "........."
|
||||
|
||||
# `parseEnum`, ref issue #14030
|
||||
# check enum defined at top level # xxx this is probably irrelevant, and pollutes scope
|
||||
# for remaining tests
|
||||
|
||||
Reference in New Issue
Block a user