Merge pull request #8001 from kaushalmodi/fix-isupper-islower-try2

Make isUpper (and variants) work for strings with non-alpha chars
This commit is contained in:
Andreas Rumpf
2018-06-09 08:58:08 +02:00
committed by GitHub
3 changed files with 151 additions and 48 deletions

View File

@@ -42,6 +42,12 @@
- ``math.`mod` `` for floats now behaves the same as ``mod`` for integers
(previously it used floor division like Python). Use ``math.floorMod`` for the old behavior.
- For string inputs, ``unicode.isUpper`` and ``unicode.isLower`` now require a
second mandatory parameter ``skipNonAlpha``.
- For string inputs, ``strutils.isUpperAscii`` and ``strutils.isLowerAscii`` now
require a second mandatory parameter ``skipNonAlpha``.
#### Breaking changes in the compiler
- The undocumented ``#? braces`` parsing mode was removed.

View File

@@ -150,23 +150,52 @@ proc isSpaceAscii*(s: string): bool {.noSideEffect, procvar,
## characters and there is at least one character in `s`.
isImpl isSpaceAscii
proc isLowerAscii*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nsuIsLowerAsciiStr".} =
## Checks whether or not `s` contains all lower case characters.
##
## This checks ASCII characters only.
## Returns true if all characters in `s` are lower case
## and there is at least one character in `s`.
isImpl isLowerAscii
template isCaseImpl(s, charProc, skipNonAlpha) =
var hasAtleastOneAlphaChar = false
if s.len == 0: return false
for c in s:
if skipNonAlpha:
var charIsAlpha = c.isAlphaAscii()
if not hasAtleastOneAlphaChar:
hasAtleastOneAlphaChar = charIsAlpha
if charIsAlpha and (not charProc(c)):
return false
else:
if not charProc(c):
return false
return if skipNonAlpha: hasAtleastOneAlphaChar else: true
proc isUpperAscii*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nsuIsUpperAsciiStr".} =
## Checks whether or not `s` contains all upper case characters.
proc isLowerAscii*(s: string, skipNonAlpha: bool): bool =
## Checks whether ``s`` is lower case.
##
## This checks ASCII characters only.
## Returns true if all characters in `s` are upper case
## and there is at least one character in `s`.
isImpl isUpperAscii
##
## If ``skipNonAlpha`` is true, returns true if all alphabetical
## characters in ``s`` are lower case. Returns false if none of the
## characters in ``s`` are alphabetical.
##
## If ``skipNonAlpha`` is false, returns true only if all characters
## in ``s`` are alphabetical and lower case.
##
## For either value of ``skipNonAlpha``, returns false if ``s`` is
## an empty string.
isCaseImpl(s, isLowerAscii, skipNonAlpha)
proc isUpperAscii*(s: string, skipNonAlpha: bool): bool =
## Checks whether ``s`` is upper case.
##
## This checks ASCII characters only.
##
## If ``skipNonAlpha`` is true, returns true if all alphabetical
## characters in ``s`` are upper case. Returns false if none of the
## characters in ``s`` are alphabetical.
##
## If ``skipNonAlpha`` is false, returns true only if all characters
## in ``s`` are alphabetical and upper case.
##
## For either value of ``skipNonAlpha``, returns false if ``s`` is
## an empty string.
isCaseImpl(s, isUpperAscii, skipNonAlpha)
proc toLowerAscii*(c: char): char {.noSideEffect, procvar,
rtl, extern: "nsuToLowerAsciiChar".} =
@@ -2516,19 +2545,34 @@ when isMainModule:
doAssert(not isLowerAscii('A'))
doAssert(not isLowerAscii('5'))
doAssert(not isLowerAscii('&'))
doAssert(not isLowerAscii(' '))
doAssert isLowerAscii("abcd")
doAssert(not isLowerAscii("abCD"))
doAssert(not isLowerAscii("33aa"))
doAssert isLowerAscii("abcd", false)
doAssert(not isLowerAscii("33aa", false))
doAssert(not isLowerAscii("a b", false))
doAssert(not isLowerAscii("abCD", true))
doAssert isLowerAscii("33aa", true)
doAssert isLowerAscii("a b", true)
doAssert isLowerAscii("1, 2, 3 go!", true)
doAssert(not isLowerAscii(" ", true))
doAssert(not isLowerAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets
doAssert isUpperAscii('A')
doAssert(not isUpperAscii('b'))
doAssert(not isUpperAscii('5'))
doAssert(not isUpperAscii('%'))
doAssert isUpperAscii("ABC")
doAssert(not isUpperAscii("AAcc"))
doAssert(not isUpperAscii("A#$"))
doAssert isUpperAscii("ABC", false)
doAssert(not isUpperAscii("A#$", false))
doAssert(not isUpperAscii("A B", false))
doAssert(not isUpperAscii("AAcc", true))
doAssert isUpperAscii("A#$", true)
doAssert isUpperAscii("A B", true)
doAssert isUpperAscii("1, 2, 3 GO!", true)
doAssert(not isUpperAscii(" ", true))
doAssert(not isUpperAscii("(*&#@(^#$ ", true)) # None of the string chars are alphabets
doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
@@ -2601,4 +2645,3 @@ bar
nonStaticTests()
staticTests()
static: staticTests()

View File

@@ -1392,7 +1392,7 @@ proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1", procvar.} =
(c >= 0xfe20 and c <= 0xfe2f))
template runeCheck(s, runeProc) =
## Common code for rune.isLower, rune.isUpper, etc
## Common code for isAlpha and isSpace.
result = if len(s) == 0: false else: true
var
@@ -1403,16 +1403,6 @@ template runeCheck(s, runeProc) =
fastRuneAt(s, i, rune, doInc=true)
result = runeProc(rune) and result
proc isUpper*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all upper case unicode characters.
runeCheck(s, isUpper)
proc isLower*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all lower case unicode characters.
runeCheck(s, isLower)
proc isAlpha*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
## Returns true iff `s` contains all alphabetic unicode characters.
@@ -1423,6 +1413,56 @@ proc isSpace*(s: string): bool {.noSideEffect, procvar,
## Returns true iff `s` contains all whitespace unicode characters.
runeCheck(s, isWhiteSpace)
template runeCaseCheck(s, runeProc, skipNonAlpha) =
## Common code for rune.isLower and rune.isUpper.
if len(s) == 0: return false
var
i = 0
rune: Rune
hasAtleastOneAlphaRune = false
while i < len(s):
fastRuneAt(s, i, rune, doInc=true)
if skipNonAlpha:
var runeIsAlpha = isAlpha(rune)
if not hasAtleastOneAlphaRune:
hasAtleastOneAlphaRune = runeIsAlpha
if runeIsAlpha and (not runeProc(rune)):
return false
else:
if not runeProc(rune):
return false
return if skipNonAlpha: hasAtleastOneAlphaRune else: true
proc isLower*(s: string, skipNonAlpha: bool): bool =
## Checks whether ``s`` is lower case.
##
## If ``skipNonAlpha`` is true, returns true if all alphabetical
## runes in ``s`` are lower case. Returns false if none of the
## runes in ``s`` are alphabetical.
##
## If ``skipNonAlpha`` is false, returns true only if all runes in
## ``s`` are alphabetical and lower case.
##
## For either value of ``skipNonAlpha``, returns false if ``s`` is
## an empty string.
runeCaseCheck(s, isLower, skipNonAlpha)
proc isUpper*(s: string, skipNonAlpha: bool): bool =
## Checks whether ``s`` is upper case.
##
## If ``skipNonAlpha`` is true, returns true if all alphabetical
## runes in ``s`` are upper case. Returns false if none of the
## runes in ``s`` are alphabetical.
##
## If ``skipNonAlpha`` is false, returns true only if all runes in
## ``s`` are alphabetical and upper case.
##
## For either value of ``skipNonAlpha``, returns false if ``s`` is
## an empty string.
runeCaseCheck(s, isUpper, skipNonAlpha)
template convertRune(s, runeProc) =
## Convert runes in `s` using `runeProc` as the converter.
result = newString(len(s))
@@ -1755,25 +1795,39 @@ when isMainModule:
doAssert(not isSpace(""))
doAssert(not isSpace("ΑΓc \td"))
doAssert isLower("a")
doAssert isLower("γ")
doAssert(not isLower("Γ"))
doAssert(not isLower("4"))
doAssert(not isLower(""))
doAssert(not isLower(' '.Rune))
doAssert isLower("abcdγ")
doAssert(not isLower("abCDΓ"))
doAssert(not isLower("33aaΓ"))
doAssert isLower("a", false)
doAssert isLower("γ", true)
doAssert(not isLower("Γ", false))
doAssert(not isLower("4", true))
doAssert(not isLower("", false))
doAssert isLower("abcdγ", false)
doAssert(not isLower("33aaΓ", false))
doAssert(not isLower("a b", false))
doAssert isUpper("Γ")
doAssert(not isUpper("b"))
doAssert(not isUpper("α"))
doAssert(not isUpper(""))
doAssert(not isUpper(""))
doAssert(not isLower("abCDΓ", true))
doAssert isLower("a b", true)
doAssert isLower("1, 2, 3 go!", true)
doAssert(not isLower(" ", true))
doAssert(not isLower("(*&#@(^#$", true)) # None of the string runes are alphabets
doAssert isUpper("ΑΒΓ")
doAssert(not isUpper("AAccβ"))
doAssert(not isUpper("A#"))
doAssert(not isUpper(' '.Rune))
doAssert isUpper("Γ", false)
doAssert(not isUpper("α", false))
doAssert(not isUpper("", false))
doAssert isUpper("ΑΒΓ", false)
doAssert(not isUpper("A#", false))
doAssert(not isUpper("A B", false))
doAssert(not isUpper("b", true))
doAssert(not isUpper("", true))
doAssert(not isUpper("AAccβ", true))
doAssert isUpper("A B", true)
doAssert isUpper("1, 2, 3 GO!", true)
doAssert(not isUpper(" ", true))
doAssert(not isUpper("(*&#@(^#$", true)) # None of the string runes are alphabets
doAssert toUpper("Γ") == "Γ"
doAssert toUpper("b") == "B"