mirror of
https://github.com/nim-lang/Nim.git
synced 2025-12-31 18:32:11 +00:00
Merge pull request #8001 from kaushalmodi/fix-isupper-islower-try2
Make isUpper (and variants) work for strings with non-alpha chars
This commit is contained in:
@@ -42,6 +42,12 @@
|
||||
- ``math.`mod` `` for floats now behaves the same as ``mod`` for integers
|
||||
(previously it used floor division like Python). Use ``math.floorMod`` for the old behavior.
|
||||
|
||||
- For string inputs, ``unicode.isUpper`` and ``unicode.isLower`` now require a
|
||||
second mandatory parameter ``skipNonAlpha``.
|
||||
|
||||
- For string inputs, ``strutils.isUpperAscii`` and ``strutils.isLowerAscii`` now
|
||||
require a second mandatory parameter ``skipNonAlpha``.
|
||||
|
||||
#### Breaking changes in the compiler
|
||||
|
||||
- The undocumented ``#? braces`` parsing mode was removed.
|
||||
|
||||
@@ -150,23 +150,52 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
|
||||
## characters and there is at least one character in `s`.
|
||||
isImpl isSpaceAscii
|
||||
|
||||
proc isLowerAscii*(s: string): bool {.noSideEffect, procvar,
|
||||
rtl, extern: "nsuIsLowerAsciiStr".} =
|
||||
## Checks whether or not `s` contains all lower case characters.
|
||||
##
|
||||
## This checks ASCII characters only.
|
||||
## Returns true if all characters in `s` are lower case
|
||||
## and there is at least one character in `s`.
|
||||
isImpl isLowerAscii
|
||||
template isCaseImpl(s, charProc, skipNonAlpha) =
|
||||
var hasAtleastOneAlphaChar = false
|
||||
if s.len == 0: return false
|
||||
for c in s:
|
||||
if skipNonAlpha:
|
||||
var charIsAlpha = c.isAlphaAscii()
|
||||
if not hasAtleastOneAlphaChar:
|
||||
hasAtleastOneAlphaChar = charIsAlpha
|
||||
if charIsAlpha and (not charProc(c)):
|
||||
return false
|
||||
else:
|
||||
if not charProc(c):
|
||||
return false
|
||||
return if skipNonAlpha: hasAtleastOneAlphaChar else: true
|
||||
|
||||
proc isUpperAscii*(s: string): bool {.noSideEffect, procvar,
|
||||
rtl, extern: "nsuIsUpperAsciiStr".} =
|
||||
## Checks whether or not `s` contains all upper case characters.
|
||||
proc isLowerAscii*(s: string, skipNonAlpha: bool): bool =
|
||||
## Checks whether ``s`` is lower case.
|
||||
##
|
||||
## This checks ASCII characters only.
|
||||
## Returns true if all characters in `s` are upper case
|
||||
## and there is at least one character in `s`.
|
||||
isImpl isUpperAscii
|
||||
##
|
||||
## If ``skipNonAlpha`` is true, returns true if all alphabetical
|
||||
## characters in ``s`` are lower case. Returns false if none of the
|
||||
## characters in ``s`` are alphabetical.
|
||||
##
|
||||
## If ``skipNonAlpha`` is false, returns true only if all characters
|
||||
## in ``s`` are alphabetical and lower case.
|
||||
##
|
||||
## For either value of ``skipNonAlpha``, returns false if ``s`` is
|
||||
## an empty string.
|
||||
isCaseImpl(s, isLowerAscii, skipNonAlpha)
|
||||
|
||||
proc isUpperAscii*(s: string, skipNonAlpha: bool): bool =
|
||||
## Checks whether ``s`` is upper case.
|
||||
##
|
||||
## This checks ASCII characters only.
|
||||
##
|
||||
## If ``skipNonAlpha`` is true, returns true if all alphabetical
|
||||
## characters in ``s`` are upper case. Returns false if none of the
|
||||
## characters in ``s`` are alphabetical.
|
||||
##
|
||||
## If ``skipNonAlpha`` is false, returns true only if all characters
|
||||
## in ``s`` are alphabetical and upper case.
|
||||
##
|
||||
## For either value of ``skipNonAlpha``, returns false if ``s`` is
|
||||
## an empty string.
|
||||
isCaseImpl(s, isUpperAscii, skipNonAlpha)
|
||||
|
||||
proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
|
||||
rtl, extern: "nsuToLowerAsciiChar".} =
|
||||
@@ -2516,19 +2545,34 @@ when isMainModule:
|
||||
doAssert(not isLowerAscii('A'))
|
||||
doAssert(not isLowerAscii('5'))
|
||||
doAssert(not isLowerAscii('&'))
|
||||
doAssert(not isLowerAscii(' '))
|
||||
|
||||
doAssert isLowerAscii("abcd")
|
||||
doAssert(not isLowerAscii("abCD"))
|
||||
doAssert(not isLowerAscii("33aa"))
|
||||
doAssert isLowerAscii("abcd", false)
|
||||
doAssert(not isLowerAscii("33aa", false))
|
||||
doAssert(not isLowerAscii("a b", false))
|
||||
|
||||
doAssert(not isLowerAscii("abCD", true))
|
||||
doAssert isLowerAscii("33aa", true)
|
||||
doAssert isLowerAscii("a b", true)
|
||||
doAssert isLowerAscii("1, 2, 3 go!", true)
|
||||
doAssert(not isLowerAscii(" ", true))
|
||||
doAssert(not isLowerAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets
|
||||
|
||||
doAssert isUpperAscii('A')
|
||||
doAssert(not isUpperAscii('b'))
|
||||
doAssert(not isUpperAscii('5'))
|
||||
doAssert(not isUpperAscii('%'))
|
||||
|
||||
doAssert isUpperAscii("ABC")
|
||||
doAssert(not isUpperAscii("AAcc"))
|
||||
doAssert(not isUpperAscii("A#$"))
|
||||
doAssert isUpperAscii("ABC", false)
|
||||
doAssert(not isUpperAscii("A#$", false))
|
||||
doAssert(not isUpperAscii("A B", false))
|
||||
|
||||
doAssert(not isUpperAscii("AAcc", true))
|
||||
doAssert isUpperAscii("A#$", true)
|
||||
doAssert isUpperAscii("A B", true)
|
||||
doAssert isUpperAscii("1, 2, 3 GO!", true)
|
||||
doAssert(not isUpperAscii(" ", true))
|
||||
doAssert(not isUpperAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets
|
||||
|
||||
doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
|
||||
doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
|
||||
@@ -2601,4 +2645,3 @@ bar
|
||||
nonStaticTests()
|
||||
staticTests()
|
||||
static: staticTests()
|
||||
|
||||
|
||||
@@ -1392,7 +1392,7 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
|
||||
(c >= 0xfe20 and c <= 0xfe2f))
|
||||
|
||||
template runeCheck(s, runeProc) =
|
||||
## Common code for rune.isLower, rune.isUpper, etc
|
||||
## Common code for isAlpha and isSpace.
|
||||
result = if len(s) == 0: false else: true
|
||||
|
||||
var
|
||||
@@ -1403,16 +1403,6 @@ template runeCheck(s, runeProc) =
|
||||
fastRuneAt(s, i, rune, doInc=true)
|
||||
result = runeProc(rune) and result
|
||||
|
||||
proc isUpper*(s: string): bool {.noSideEffect, procvar,
|
||||
rtl, extern: "nuc$1Str".} =
|
||||
## Returns true iff `s` contains all upper case unicode characters.
|
||||
runeCheck(s, isUpper)
|
||||
|
||||
proc isLower*(s: string): bool {.noSideEffect, procvar,
|
||||
rtl, extern: "nuc$1Str".} =
|
||||
## Returns true iff `s` contains all lower case unicode characters.
|
||||
runeCheck(s, isLower)
|
||||
|
||||
proc isAlpha*(s: string): bool {.noSideEffect, procvar,
|
||||
rtl, extern: "nuc$1Str".} =
|
||||
## Returns true iff `s` contains all alphabetic unicode characters.
|
||||
@@ -1423,6 +1413,56 @@ proc isSpace*(s: string): bool {.noSideEffect, procvar,
|
||||
## Returns true iff `s` contains all whitespace unicode characters.
|
||||
runeCheck(s, isWhiteSpace)
|
||||
|
||||
template runeCaseCheck(s, runeProc, skipNonAlpha) =
|
||||
## Common code for rune.isLower and rune.isUpper.
|
||||
if len(s) == 0: return false
|
||||
|
||||
var
|
||||
i = 0
|
||||
rune: Rune
|
||||
hasAtleastOneAlphaRune = false
|
||||
|
||||
while i < len(s):
|
||||
fastRuneAt(s, i, rune, doInc=true)
|
||||
if skipNonAlpha:
|
||||
var runeIsAlpha = isAlpha(rune)
|
||||
if not hasAtleastOneAlphaRune:
|
||||
hasAtleastOneAlphaRune = runeIsAlpha
|
||||
if runeIsAlpha and (not runeProc(rune)):
|
||||
return false
|
||||
else:
|
||||
if not runeProc(rune):
|
||||
return false
|
||||
return if skipNonAlpha: hasAtleastOneAlphaRune else: true
|
||||
|
||||
proc isLower*(s: string, skipNonAlpha: bool): bool =
|
||||
## Checks whether ``s`` is lower case.
|
||||
##
|
||||
## If ``skipNonAlpha`` is true, returns true if all alphabetical
|
||||
## runes in ``s`` are lower case. Returns false if none of the
|
||||
## runes in ``s`` are alphabetical.
|
||||
##
|
||||
## If ``skipNonAlpha`` is false, returns true only if all runes in
|
||||
## ``s`` are alphabetical and lower case.
|
||||
##
|
||||
## For either value of ``skipNonAlpha``, returns false if ``s`` is
|
||||
## an empty string.
|
||||
runeCaseCheck(s, isLower, skipNonAlpha)
|
||||
|
||||
proc isUpper*(s: string, skipNonAlpha: bool): bool =
|
||||
## Checks whether ``s`` is upper case.
|
||||
##
|
||||
## If ``skipNonAlpha`` is true, returns true if all alphabetical
|
||||
## runes in ``s`` are upper case. Returns false if none of the
|
||||
## runes in ``s`` are alphabetical.
|
||||
##
|
||||
## If ``skipNonAlpha`` is false, returns true only if all runes in
|
||||
## ``s`` are alphabetical and upper case.
|
||||
##
|
||||
## For either value of ``skipNonAlpha``, returns false if ``s`` is
|
||||
## an empty string.
|
||||
runeCaseCheck(s, isUpper, skipNonAlpha)
|
||||
|
||||
template convertRune(s, runeProc) =
|
||||
## Convert runes in `s` using `runeProc` as the converter.
|
||||
result = newString(len(s))
|
||||
@@ -1755,25 +1795,39 @@ when isMainModule:
|
||||
doAssert(not isSpace(""))
|
||||
doAssert(not isSpace("ΑΓc \td"))
|
||||
|
||||
doAssert isLower("a")
|
||||
doAssert isLower("γ")
|
||||
doAssert(not isLower("Γ"))
|
||||
doAssert(not isLower("4"))
|
||||
doAssert(not isLower(""))
|
||||
doAssert(not isLower(' '.Rune))
|
||||
|
||||
doAssert isLower("abcdγ")
|
||||
doAssert(not isLower("abCDΓ"))
|
||||
doAssert(not isLower("33aaΓ"))
|
||||
doAssert isLower("a", false)
|
||||
doAssert isLower("γ", true)
|
||||
doAssert(not isLower("Γ", false))
|
||||
doAssert(not isLower("4", true))
|
||||
doAssert(not isLower("", false))
|
||||
doAssert isLower("abcdγ", false)
|
||||
doAssert(not isLower("33aaΓ", false))
|
||||
doAssert(not isLower("a b", false))
|
||||
|
||||
doAssert isUpper("Γ")
|
||||
doAssert(not isUpper("b"))
|
||||
doAssert(not isUpper("α"))
|
||||
doAssert(not isUpper("✓"))
|
||||
doAssert(not isUpper(""))
|
||||
doAssert(not isLower("abCDΓ", true))
|
||||
doAssert isLower("a b", true)
|
||||
doAssert isLower("1, 2, 3 go!", true)
|
||||
doAssert(not isLower(" ", true))
|
||||
doAssert(not isLower("(*&#@(^#$✓ ", true)) # None of the string runes are alphabets
|
||||
|
||||
doAssert isUpper("ΑΒΓ")
|
||||
doAssert(not isUpper("AAccβ"))
|
||||
doAssert(not isUpper("A#$β"))
|
||||
doAssert(not isUpper(' '.Rune))
|
||||
|
||||
doAssert isUpper("Γ", false)
|
||||
doAssert(not isUpper("α", false))
|
||||
doAssert(not isUpper("", false))
|
||||
doAssert isUpper("ΑΒΓ", false)
|
||||
doAssert(not isUpper("A#$β", false))
|
||||
doAssert(not isUpper("A B", false))
|
||||
|
||||
doAssert(not isUpper("b", true))
|
||||
doAssert(not isUpper("✓", true))
|
||||
doAssert(not isUpper("AAccβ", true))
|
||||
doAssert isUpper("A B", true)
|
||||
doAssert isUpper("1, 2, 3 GO!", true)
|
||||
doAssert(not isUpper(" ", true))
|
||||
doAssert(not isUpper("(*&#@(^#$✓ ", true)) # None of the string runes are alphabets
|
||||
|
||||
doAssert toUpper("Γ") == "Γ"
|
||||
doAssert toUpper("b") == "B"
|
||||
|
||||
Reference in New Issue
Block a user