[backport] run nimpretty on string stuff

This commit is contained in:
narimiran
2019-09-27 11:20:18 +02:00
parent 0ca9cc7419
commit dcf3181bd1
11 changed files with 421 additions and 371 deletions

View File

@@ -34,66 +34,68 @@ when defined(windows):
while i < a.len and j < b.len:
if a[i] in {'-', '_'}: inc i
if b[j] in {'-', '_'}: inc j
if i < a.len and j < b.len and a[i].toLowerAscii != b[j].toLowerAscii: return false
if i < a.len and j < b.len and
a[i].toLowerAscii != b[j].toLowerAscii:
return false
inc i
inc j
result = i == a.len and j == b.len
const
winEncodings = [
(1, "OEMCP"), # current OEM codepage
(037, "IBM037"), # IBM EBCDIC US-Canada
(437, "IBM437"), # OEM United States
(500, "IBM500"), # IBM EBCDIC International
(708, "ASMO-708"), # Arabic (ASMO 708)
(709, "ASMO_449"), # Arabic (ASMO-449+, BCON V4)
(710, ""), # Arabic - Transparent Arabic
(720, "DOS-720"), # Arabic (Transparent ASMO); Arabic (DOS)
(737, "ibm737"), # OEM Greek (formerly 437G); Greek (DOS)
(775, "ibm775"), # OEM Baltic; Baltic (DOS)
(850, "ibm850"), # OEM Multilingual Latin 1; Western European (DOS)
(852, "ibm852"), # OEM Latin 2; Central European (DOS)
(855, "IBM855"), # OEM Cyrillic (primarily Russian)
(857, "ibm857"), # OEM Turkish; Turkish (DOS)
(858, "IBM00858"), # OEM Multilingual Latin 1 + Euro symbol
(860, "IBM860"), # OEM Portuguese; Portuguese (DOS)
(861, "ibm861"), # OEM Icelandic; Icelandic (DOS)
(862, "DOS-862"), # OEM Hebrew; Hebrew (DOS)
(863, "IBM863"), # OEM French Canadian; French Canadian (DOS)
(864, "IBM864"), # OEM Arabic; Arabic (864)
(865, "IBM865"), # OEM Nordic; Nordic (DOS)
(866, "cp866"), # OEM Russian; Cyrillic (DOS)
(869, "ibm869"), # OEM Modern Greek; Greek, Modern (DOS)
(870, "IBM870"), # IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
(874, "windows-874"), # ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows)
(875, "cp875"), # IBM EBCDIC Greek Modern
(932, "shift_jis"), # ANSI/OEM Japanese; Japanese (Shift-JIS)
(936, "gb2312"), # ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
(1, "OEMCP"), # current OEM codepage
(037, "IBM037"), # IBM EBCDIC US-Canada
(437, "IBM437"), # OEM United States
(500, "IBM500"), # IBM EBCDIC International
(708, "ASMO-708"), # Arabic (ASMO 708)
(709, "ASMO_449"), # Arabic (ASMO-449+, BCON V4)
(710, ""), # Arabic - Transparent Arabic
(720, "DOS-720"), # Arabic (Transparent ASMO); Arabic (DOS)
(737, "ibm737"), # OEM Greek (formerly 437G); Greek (DOS)
(775, "ibm775"), # OEM Baltic; Baltic (DOS)
(850, "ibm850"), # OEM Multilingual Latin 1; Western European (DOS)
(852, "ibm852"), # OEM Latin 2; Central European (DOS)
(855, "IBM855"), # OEM Cyrillic (primarily Russian)
(857, "ibm857"), # OEM Turkish; Turkish (DOS)
(858, "IBM00858"), # OEM Multilingual Latin 1 + Euro symbol
(860, "IBM860"), # OEM Portuguese; Portuguese (DOS)
(861, "ibm861"), # OEM Icelandic; Icelandic (DOS)
(862, "DOS-862"), # OEM Hebrew; Hebrew (DOS)
(863, "IBM863"), # OEM French Canadian; French Canadian (DOS)
(864, "IBM864"), # OEM Arabic; Arabic (864)
(865, "IBM865"), # OEM Nordic; Nordic (DOS)
(866, "cp866"), # OEM Russian; Cyrillic (DOS)
(869, "ibm869"), # OEM Modern Greek; Greek, Modern (DOS)
(870, "IBM870"), # IBM EBCDIC Multilingual/ROECE (Latin 2); IBM EBCDIC Multilingual Latin 2
(874, "windows-874"), # ANSI/OEM Thai (same as 28605, ISO 8859-15); Thai (Windows)
(875, "cp875"), # IBM EBCDIC Greek Modern
(932, "shift_jis"), # ANSI/OEM Japanese; Japanese (Shift-JIS)
(936, "gb2312"), # ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
(949, "ks_c_5601-1987"), # ANSI/OEM Korean (Unified Hangul Code)
(950, "big5"), # ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
(1026, "IBM1026"), # IBM EBCDIC Turkish (Latin 5)
(1047, "IBM01047"), # IBM EBCDIC Latin 1/Open System
(1140, "IBM01140"), # IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
(1141, "IBM01141"), # IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
(1142, "IBM01142"), # IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
(1143, "IBM01143"), # IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
(1144, "IBM01144"), # IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
(1145, "IBM01145"), # IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
(1146, "IBM01146"), # IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
(1147, "IBM01147"), # IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
(1148, "IBM01148"), # IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
(1149, "IBM01149"), # IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
(1200, "utf-16"), # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
(1201, "unicodeFFFE"), # Unicode UTF-16, big endian byte order; available only to managed applications
(1250, "windows-1250"), # ANSI Central European; Central European (Windows)
(1251, "windows-1251"), # ANSI Cyrillic; Cyrillic (Windows)
(1252, "windows-1252"), # ANSI Latin 1; Western European (Windows)
(1253, "windows-1253"), # ANSI Greek; Greek (Windows)
(1254, "windows-1254"), # ANSI Turkish; Turkish (Windows)
(1255, "windows-1255"), # ANSI Hebrew; Hebrew (Windows)
(1256, "windows-1256"), # ANSI Arabic; Arabic (Windows)
(1257, "windows-1257"), # ANSI Baltic; Baltic (Windows)
(1258, "windows-1258"), # ANSI/OEM Vietnamese; Vietnamese (Windows)
(950, "big5"), # ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
(1026, "IBM1026"), # IBM EBCDIC Turkish (Latin 5)
(1047, "IBM01047"), # IBM EBCDIC Latin 1/Open System
(1140, "IBM01140"), # IBM EBCDIC US-Canada (037 + Euro symbol); IBM EBCDIC (US-Canada-Euro)
(1141, "IBM01141"), # IBM EBCDIC Germany (20273 + Euro symbol); IBM EBCDIC (Germany-Euro)
(1142, "IBM01142"), # IBM EBCDIC Denmark-Norway (20277 + Euro symbol); IBM EBCDIC (Denmark-Norway-Euro)
(1143, "IBM01143"), # IBM EBCDIC Finland-Sweden (20278 + Euro symbol); IBM EBCDIC (Finland-Sweden-Euro)
(1144, "IBM01144"), # IBM EBCDIC Italy (20280 + Euro symbol); IBM EBCDIC (Italy-Euro)
(1145, "IBM01145"), # IBM EBCDIC Latin America-Spain (20284 + Euro symbol); IBM EBCDIC (Spain-Euro)
(1146, "IBM01146"), # IBM EBCDIC United Kingdom (20285 + Euro symbol); IBM EBCDIC (UK-Euro)
(1147, "IBM01147"), # IBM EBCDIC France (20297 + Euro symbol); IBM EBCDIC (France-Euro)
(1148, "IBM01148"), # IBM EBCDIC International (500 + Euro symbol); IBM EBCDIC (International-Euro)
(1149, "IBM01149"), # IBM EBCDIC Icelandic (20871 + Euro symbol); IBM EBCDIC (Icelandic-Euro)
(1200, "utf-16"), # Unicode UTF-16, little endian byte order (BMP of ISO 10646); available only to managed applications
(1201, "unicodeFFFE"), # Unicode UTF-16, big endian byte order; available only to managed applications
(1250, "windows-1250"), # ANSI Central European; Central European (Windows)
(1251, "windows-1251"), # ANSI Cyrillic; Cyrillic (Windows)
(1252, "windows-1252"), # ANSI Latin 1; Western European (Windows)
(1253, "windows-1253"), # ANSI Greek; Greek (Windows)
(1254, "windows-1254"), # ANSI Turkish; Turkish (Windows)
(1255, "windows-1255"), # ANSI Hebrew; Hebrew (Windows)
(1256, "windows-1256"), # ANSI Arabic; Arabic (Windows)
(1257, "windows-1257"), # ANSI Baltic; Baltic (Windows)
(1258, "windows-1258"), # ANSI/OEM Vietnamese; Vietnamese (Windows)
(1250, "cp-1250"), # ANSI Central European; Central European (Windows)
(1251, "cp-1251"), # ANSI Cyrillic; Cyrillic (Windows)
@@ -105,106 +107,106 @@ when defined(windows):
(1257, "cp-1257"), # ANSI Baltic; Baltic (Windows)
(1258, "cp-1258"), # ANSI/OEM Vietnamese; Vietnamese (Windows)
(1361, "Johab"), # Korean (Johab)
(10000, "macintosh"), # MAC Roman; Western European (Mac)
(10001, "x-mac-japanese"), # Japanese (Mac)
(10002, "x-mac-chinesetrad"), # MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
(10003, "x-mac-korean"), # Korean (Mac)
(10004, "x-mac-arabic"), # Arabic (Mac)
(10005, "x-mac-hebrew"), # Hebrew (Mac)
(10006, "x-mac-greek"), # Greek (Mac)
(10007, "x-mac-cyrillic"), # Cyrillic (Mac)
(10008, "x-mac-chinesesimp"), # MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
(10010, "x-mac-romanian"), # Romanian (Mac)
(10017, "x-mac-ukrainian"), # Ukrainian (Mac)
(10021, "x-mac-thai"), # Thai (Mac)
(10029, "x-mac-ce"), # MAC Latin 2; Central European (Mac)
(10079, "x-mac-icelandic"), # Icelandic (Mac)
(10081, "x-mac-turkish"), # Turkish (Mac)
(10082, "x-mac-croatian"), # Croatian (Mac)
(12000, "utf-32"), # Unicode UTF-32, little endian byte order; available only to managed applications
(12001, "utf-32BE"), # Unicode UTF-32, big endian byte order; available only to managed applications
(20000, "x-Chinese_CNS"), # CNS Taiwan; Chinese Traditional (CNS)
(20001, "x-cp20001"), # TCA Taiwan
(20002, "x_Chinese-Eten"), # Eten Taiwan; Chinese Traditional (Eten)
(20003, "x-cp20003"), # IBM5550 Taiwan
(20004, "x-cp20004"), # TeleText Taiwan
(20005, "x-cp20005"), # Wang Taiwan
(20105, "x-IA5"), # IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
(20106, "x-IA5-German"), # IA5 German (7-bit)
(20107, "x-IA5-Swedish"), # IA5 Swedish (7-bit)
(20108, "x-IA5-Norwegian"), # IA5 Norwegian (7-bit)
(20127, "us-ascii"), # US-ASCII (7-bit)
(20261, "x-cp20261"), # T.61
(20269, "x-cp20269"), # ISO 6937 Non-Spacing Accent
(20273, "IBM273"), # IBM EBCDIC Germany
(20277, "IBM277"), # IBM EBCDIC Denmark-Norway
(20278, "IBM278"), # IBM EBCDIC Finland-Sweden
(20280, "IBM280"), # IBM EBCDIC Italy
(20284, "IBM284"), # IBM EBCDIC Latin America-Spain
(20285, "IBM285"), # IBM EBCDIC United Kingdom
(20290, "IBM290"), # IBM EBCDIC Japanese Katakana Extended
(20297, "IBM297"), # IBM EBCDIC France
(20420, "IBM420"), # IBM EBCDIC Arabic
(20423, "IBM423"), # IBM EBCDIC Greek
(20424, "IBM424"), # IBM EBCDIC Hebrew
(1361, "Johab"), # Korean (Johab)
(10000, "macintosh"), # MAC Roman; Western European (Mac)
(10001, "x-mac-japanese"), # Japanese (Mac)
(10002, "x-mac-chinesetrad"), # MAC Traditional Chinese (Big5); Chinese Traditional (Mac)
(10003, "x-mac-korean"), # Korean (Mac)
(10004, "x-mac-arabic"), # Arabic (Mac)
(10005, "x-mac-hebrew"), # Hebrew (Mac)
(10006, "x-mac-greek"), # Greek (Mac)
(10007, "x-mac-cyrillic"), # Cyrillic (Mac)
(10008, "x-mac-chinesesimp"), # MAC Simplified Chinese (GB 2312); Chinese Simplified (Mac)
(10010, "x-mac-romanian"), # Romanian (Mac)
(10017, "x-mac-ukrainian"), # Ukrainian (Mac)
(10021, "x-mac-thai"), # Thai (Mac)
(10029, "x-mac-ce"), # MAC Latin 2; Central European (Mac)
(10079, "x-mac-icelandic"), # Icelandic (Mac)
(10081, "x-mac-turkish"), # Turkish (Mac)
(10082, "x-mac-croatian"), # Croatian (Mac)
(12000, "utf-32"), # Unicode UTF-32, little endian byte order; available only to managed applications
(12001, "utf-32BE"), # Unicode UTF-32, big endian byte order; available only to managed applications
(20000, "x-Chinese_CNS"), # CNS Taiwan; Chinese Traditional (CNS)
(20001, "x-cp20001"), # TCA Taiwan
(20002, "x_Chinese-Eten"), # Eten Taiwan; Chinese Traditional (Eten)
(20003, "x-cp20003"), # IBM5550 Taiwan
(20004, "x-cp20004"), # TeleText Taiwan
(20005, "x-cp20005"), # Wang Taiwan
(20105, "x-IA5"), # IA5 (IRV International Alphabet No. 5, 7-bit); Western European (IA5)
(20106, "x-IA5-German"), # IA5 German (7-bit)
(20107, "x-IA5-Swedish"), # IA5 Swedish (7-bit)
(20108, "x-IA5-Norwegian"), # IA5 Norwegian (7-bit)
(20127, "us-ascii"), # US-ASCII (7-bit)
(20261, "x-cp20261"), # T.61
(20269, "x-cp20269"), # ISO 6937 Non-Spacing Accent
(20273, "IBM273"), # IBM EBCDIC Germany
(20277, "IBM277"), # IBM EBCDIC Denmark-Norway
(20278, "IBM278"), # IBM EBCDIC Finland-Sweden
(20280, "IBM280"), # IBM EBCDIC Italy
(20284, "IBM284"), # IBM EBCDIC Latin America-Spain
(20285, "IBM285"), # IBM EBCDIC United Kingdom
(20290, "IBM290"), # IBM EBCDIC Japanese Katakana Extended
(20297, "IBM297"), # IBM EBCDIC France
(20420, "IBM420"), # IBM EBCDIC Arabic
(20423, "IBM423"), # IBM EBCDIC Greek
(20424, "IBM424"), # IBM EBCDIC Hebrew
(20833, "x-EBCDIC-KoreanExtended"), # IBM EBCDIC Korean Extended
(20838, "IBM-Thai"), # IBM EBCDIC Thai
(20866, "koi8-r"), # Russian (KOI8-R); Cyrillic (KOI8-R)
(20871, "IBM871"), # IBM EBCDIC Icelandic
(20880, "IBM880"), # IBM EBCDIC Cyrillic Russian
(20905, "IBM905"), # IBM EBCDIC Turkish
(20924, "IBM00924"), # IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
(20932, "EUC-JP"), # Japanese (JIS 0208-1990 and 0121-1990)
(20936, "x-cp20936"), # Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
(20949, "x-cp20949"), # Korean Wansung
(21025, "cp1025"), # IBM EBCDIC Cyrillic Serbian-Bulgarian
(21027, ""), # (deprecated)
(21866, "koi8-u"), # Ukrainian (KOI8-U); Cyrillic (KOI8-U)
(28591, "iso-8859-1"), # ISO 8859-1 Latin 1; Western European (ISO)
(28592, "iso-8859-2"), # ISO 8859-2 Central European; Central European (ISO)
(28593, "iso-8859-3"), # ISO 8859-3 Latin 3
(28594, "iso-8859-4"), # ISO 8859-4 Baltic
(28595, "iso-8859-5"), # ISO 8859-5 Cyrillic
(28596, "iso-8859-6"), # ISO 8859-6 Arabic
(28597, "iso-8859-7"), # ISO 8859-7 Greek
(28598, "iso-8859-8"), # ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
(28599, "iso-8859-9"), # ISO 8859-9 Turkish
(28603, "iso-8859-13"), # ISO 8859-13 Estonian
(28605, "iso-8859-15"), # ISO 8859-15 Latin 9
(29001, "x-Europa"), # Europa 3
(38598, "iso-8859-8-i"), # ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
(50220, "iso-2022-jp"), # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
(50221, "csISO2022JP"), # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
(50222, "iso-2022-jp"), # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
(50225, "iso-2022-kr"), # ISO 2022 Korean
(50227, "x-cp50227"), # ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
(50229, ""), # ISO 2022 Traditional Chinese
(50930, ""), # EBCDIC Japanese (Katakana) Extended
(50931, ""), # EBCDIC US-Canada and Japanese
(50933, ""), # EBCDIC Korean Extended and Korean
(50935, ""), # EBCDIC Simplified Chinese Extended and Simplified Chinese
(50936, ""), # EBCDIC Simplified Chinese
(50937, ""), # EBCDIC US-Canada and Traditional Chinese
(50939, ""), # EBCDIC Japanese (Latin) Extended and Japanese
(51932, "euc-jp"), # EUC Japanese
(51936, "EUC-CN"), # EUC Simplified Chinese; Chinese Simplified (EUC)
(51949, "euc-kr"), # EUC Korean
(51950, ""), # EUC Traditional Chinese
(52936, "hz-gb-2312"), # HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
(54936, "GB18030"), # Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
(57002, "x-iscii-de"), # ISCII Devanagari
(57003, "x-iscii-be"), # ISCII Bengali
(57004, "x-iscii-ta"), # ISCII Tamil
(57005, "x-iscii-te"), # ISCII Telugu
(57006, "x-iscii-as"), # ISCII Assamese
(57007, "x-iscii-or"), # ISCII Oriya
(57008, "x-iscii-ka"), # ISCII Kannada
(57009, "x-iscii-ma"), # ISCII Malayalam
(57010, "x-iscii-gu"), # ISCII Gujarati
(57011, "x-iscii-pa"), # ISCII Punjabi
(65000, "utf-7"), # Unicode (UTF-7)
(65001, "utf-8")] # Unicode (UTF-8)
(20838, "IBM-Thai"), # IBM EBCDIC Thai
(20866, "koi8-r"), # Russian (KOI8-R); Cyrillic (KOI8-R)
(20871, "IBM871"), # IBM EBCDIC Icelandic
(20880, "IBM880"), # IBM EBCDIC Cyrillic Russian
(20905, "IBM905"), # IBM EBCDIC Turkish
(20924, "IBM00924"), # IBM EBCDIC Latin 1/Open System (1047 + Euro symbol)
(20932, "EUC-JP"), # Japanese (JIS 0208-1990 and 0121-1990)
(20936, "x-cp20936"), # Simplified Chinese (GB2312); Chinese Simplified (GB2312-80)
(20949, "x-cp20949"), # Korean Wansung
(21025, "cp1025"), # IBM EBCDIC Cyrillic Serbian-Bulgarian
(21027, ""), # (deprecated)
(21866, "koi8-u"), # Ukrainian (KOI8-U); Cyrillic (KOI8-U)
(28591, "iso-8859-1"), # ISO 8859-1 Latin 1; Western European (ISO)
(28592, "iso-8859-2"), # ISO 8859-2 Central European; Central European (ISO)
(28593, "iso-8859-3"), # ISO 8859-3 Latin 3
(28594, "iso-8859-4"), # ISO 8859-4 Baltic
(28595, "iso-8859-5"), # ISO 8859-5 Cyrillic
(28596, "iso-8859-6"), # ISO 8859-6 Arabic
(28597, "iso-8859-7"), # ISO 8859-7 Greek
(28598, "iso-8859-8"), # ISO 8859-8 Hebrew; Hebrew (ISO-Visual)
(28599, "iso-8859-9"), # ISO 8859-9 Turkish
(28603, "iso-8859-13"), # ISO 8859-13 Estonian
(28605, "iso-8859-15"), # ISO 8859-15 Latin 9
(29001, "x-Europa"), # Europa 3
(38598, "iso-8859-8-i"), # ISO 8859-8 Hebrew; Hebrew (ISO-Logical)
(50220, "iso-2022-jp"), # ISO 2022 Japanese with no halfwidth Katakana; Japanese (JIS)
(50221, "csISO2022JP"), # ISO 2022 Japanese with halfwidth Katakana; Japanese (JIS-Allow 1 byte Kana)
(50222, "iso-2022-jp"), # ISO 2022 Japanese JIS X 0201-1989; Japanese (JIS-Allow 1 byte Kana - SO/SI)
(50225, "iso-2022-kr"), # ISO 2022 Korean
(50227, "x-cp50227"), # ISO 2022 Simplified Chinese; Chinese Simplified (ISO 2022)
(50229, ""), # ISO 2022 Traditional Chinese
(50930, ""), # EBCDIC Japanese (Katakana) Extended
(50931, ""), # EBCDIC US-Canada and Japanese
(50933, ""), # EBCDIC Korean Extended and Korean
(50935, ""), # EBCDIC Simplified Chinese Extended and Simplified Chinese
(50936, ""), # EBCDIC Simplified Chinese
(50937, ""), # EBCDIC US-Canada and Traditional Chinese
(50939, ""), # EBCDIC Japanese (Latin) Extended and Japanese
(51932, "euc-jp"), # EUC Japanese
(51936, "EUC-CN"), # EUC Simplified Chinese; Chinese Simplified (EUC)
(51949, "euc-kr"), # EUC Korean
(51950, ""), # EUC Traditional Chinese
(52936, "hz-gb-2312"), # HZ-GB2312 Simplified Chinese; Chinese Simplified (HZ)
(54936, "GB18030"), # Windows XP and later: GB18030 Simplified Chinese (4 byte); Chinese Simplified (GB18030)
(57002, "x-iscii-de"), # ISCII Devanagari
(57003, "x-iscii-be"), # ISCII Bengali
(57004, "x-iscii-ta"), # ISCII Tamil
(57005, "x-iscii-te"), # ISCII Telugu
(57006, "x-iscii-as"), # ISCII Assamese
(57007, "x-iscii-or"), # ISCII Oriya
(57008, "x-iscii-ka"), # ISCII Kannada
(57009, "x-iscii-ma"), # ISCII Malayalam
(57010, "x-iscii-gu"), # ISCII Gujarati
(57011, "x-iscii-pa"), # ISCII Punjabi
(65000, "utf-7"), # Unicode (UTF-7)
(65001, "utf-8")] # Unicode (UTF-8)
when false:
# not needed yet:
@@ -231,7 +233,8 @@ when defined(windows):
result = ""
proc getACP(): CodePage {.stdcall, importc: "GetACP", dynlib: "kernel32".}
proc getGetConsoleCP(): CodePage {.stdcall, importc: "GetConsoleCP", dynlib: "kernel32".}
proc getGetConsoleCP(): CodePage {.stdcall, importc: "GetConsoleCP",
dynlib: "kernel32".}
proc multiByteToWideChar(
codePage: CodePage,
@@ -249,8 +252,8 @@ when defined(windows):
cchWideChar: cint,
lpMultiByteStr: cstring,
cbMultiByte: cint,
lpDefaultChar: cstring=nil,
lpUsedDefaultChar: pointer=nil): cint {.
lpDefaultChar: cstring = nil,
lpUsedDefaultChar: pointer = nil): cint {.
stdcall, importc: "WideCharToMultiByte", dynlib: "kernel32".}
else:
@@ -394,7 +397,8 @@ when defined(windows):
else:
assert(false) # cannot happen
proc convertWin(codePageFrom: CodePage, codePageTo: CodePage, s: string): string =
proc convertWin(codePageFrom: CodePage, codePageTo: CodePage,
s: string): string =
# special case: empty string: needed because MultiByteToWideChar, WideCharToMultiByte
# return 0 in case of error
if s.len == 0: return ""
@@ -410,8 +414,10 @@ when defined(windows):
raise newException(EncodingError, message)
# in case it's already UTF-16 little endian - conversion can be simplified
let wideString = if int(codePageFrom) == 1200: s else: convertToWideString(codePageFrom, s)
return if int(codePageTo) == 1200: wideString else: convertFromWideString(codePageTo, wideString)
let wideString = if int(codePageFrom) == 1200: s
else: convertToWideString(codePageFrom, s)
return if int(codePageTo) == 1200: wideString
else: convertFromWideString(codePageTo, wideString)
proc convert*(c: EncodingConverter, s: string): string =
## converts `s` to `destEncoding` that was given to the converter `c`. It
@@ -540,4 +546,4 @@ when not defined(testing) and isMainModule and defined(windows):
result = convert(original, "utf-8", "utf-16")
doAssert(result == "")
result = convert(original, "windows-1251", "koi8-r")
doAssert(result == "")
doAssert(result == "")

View File

@@ -118,7 +118,7 @@ proc encode*(s: string): string {.raises: [PunyError].} =
## Encode a string that may contain Unicode. Prefix is empty.
result = encode("", s)
proc decode*(encoded: string): string {.raises: [PunyError].} =
proc decode*(encoded: string): string {.raises: [PunyError].} =
## Decode a Punycode-encoded string
var
n = InitialN

View File

@@ -56,7 +56,8 @@ when defined(windows):
wProcessorLevel: uint16
wProcessorRevision: uint16
proc getSystemInfo(lpSystemInfo: ptr SYSTEM_INFO) {.stdcall, dynlib: "kernel32", importc: "GetSystemInfo".}
proc getSystemInfo(lpSystemInfo: ptr SYSTEM_INFO) {.stdcall,
dynlib: "kernel32", importc: "GetSystemInfo".}
proc getAllocationGranularity: uint =
var sysInfo: SYSTEM_INFO
@@ -66,12 +67,12 @@ when defined(windows):
let allocationGranularity = getAllocationGranularity().int
const
memNoAccess = MemAccessFlags(PAGE_NOACCESS)
memExec* = MemAccessFlags(PAGE_EXECUTE)
memExecRead* = MemAccessFlags(PAGE_EXECUTE_READ)
memNoAccess = MemAccessFlags(PAGE_NOACCESS)
memExec* = MemAccessFlags(PAGE_EXECUTE)
memExecRead* = MemAccessFlags(PAGE_EXECUTE_READ)
memExecReadWrite* = MemAccessFlags(PAGE_EXECUTE_READWRITE)
memRead* = MemAccessFlags(PAGE_READONLY)
memReadWrite* = MemAccessFlags(PAGE_READWRITE)
memRead* = MemAccessFlags(PAGE_READONLY)
memReadWrite* = MemAccessFlags(PAGE_READWRITE)
template check(expr) =
let r = expr
@@ -84,12 +85,12 @@ else:
let allocationGranularity = sysconf(SC_PAGESIZE)
let
memNoAccess = MemAccessFlags(PROT_NONE)
memExec* = MemAccessFlags(PROT_EXEC)
memExecRead* = MemAccessFlags(PROT_EXEC or PROT_READ)
memNoAccess = MemAccessFlags(PROT_NONE)
memExec* = MemAccessFlags(PROT_EXEC)
memExecRead* = MemAccessFlags(PROT_EXEC or PROT_READ)
memExecReadWrite* = MemAccessFlags(PROT_EXEC or PROT_READ or PROT_WRITE)
memRead* = MemAccessFlags(PROT_READ)
memReadWrite* = MemAccessFlags(PROT_READ or PROT_WRITE)
memRead* = MemAccessFlags(PROT_READ)
memReadWrite* = MemAccessFlags(PROT_READ or PROT_WRITE)
template check(expr) =
if not expr:
@@ -125,14 +126,16 @@ proc init*(T: type ReservedMem,
let commitSize = nextAlignedOffset(initCommitLen, allocationGranularity)
when defined(windows):
result.memStart = virtualAlloc(memStart, maxLen, MEM_RESERVE, accessFlags.cint)
result.memStart = virtualAlloc(memStart, maxLen, MEM_RESERVE,
accessFlags.cint)
check result.memStart
if commitSize > 0:
check virtualAlloc(result.memStart, commitSize, MEM_COMMIT, accessFlags.cint)
check virtualAlloc(result.memStart, commitSize, MEM_COMMIT,
accessFlags.cint)
else:
var allocFlags = MAP_PRIVATE or MAP_ANONYMOUS # or MAP_NORESERVE
# if memStart != nil:
# allocFlags = allocFlags or MAP_FIXED_NOREPLACE
# if memStart != nil:
# allocFlags = allocFlags or MAP_FIXED_NOREPLACE
result.memStart = mmap(memStart, maxLen, PROT_NONE, allocFlags, -1, 0)
check result.memStart != MAP_FAILED
if commitSize > 0:
@@ -164,7 +167,8 @@ proc setLen*(m: var ReservedMem, newLen: int) =
check virtualAlloc(m.committedMemEnd, commitExtensionSize,
MEM_COMMIT, m.accessFlags.cint)
else:
check mprotect(m.committedMemEnd, commitExtensionSize, m.accessFlags.cint) == 0
check mprotect(m.committedMemEnd, commitExtensionSize,
m.accessFlags.cint) == 0
else:
let d = distance(m.usedMemEnd, m.committedMemEnd) -
m.maxCommittedAndUnusedPages * allocationGranularity

View File

@@ -19,9 +19,9 @@
include "system/inclrtl"
import streams
{.deadCodeElim: on.} # dce option deprecated
{.deadCodeElim: on.} # dce option deprecated
{.push debugger:off .} # the user does not want to trace a part
{.push debugger: off.} # the user does not want to trace a part
# of the standard library!
const
@@ -57,8 +57,8 @@ proc newRope(data: string): Rope =
result.data = data
var
cache {.threadvar.}: Rope # the root of the cache tree
N {.threadvar.}: Rope # dummy rope needed for splay algorithm
cache {.threadvar.}: Rope # the root of the cache tree
N {.threadvar.}: Rope # dummy rope needed for splay algorithm
when countCacheMisses:
var misses, hits: int
@@ -67,7 +67,7 @@ proc splay(s: string, tree: Rope, cmpres: var int): Rope =
var c: int
var t = tree
N.left = nil
N.right = nil # reset to nil
N.right = nil # reset to nil
var le = N
var r = N
while true:
@@ -235,7 +235,7 @@ proc write*(s: Stream, r: Rope) {.rtl, extern: "nroWriteStream".} =
## writes a rope to a stream.
for rs in leaves(r): write(s, rs)
proc `$`*(r: Rope): string {.rtl, extern: "nroToString".}=
proc `$`*(r: Rope): string {.rtl, extern: "nroToString".} =
## converts a rope back to a string.
result = newStringOfCap(r.len)
for s in leaves(r): add(result, s)
@@ -290,7 +290,7 @@ proc addf*(c: var Rope, frmt: string, args: openArray[Rope]) {.
add(c, frmt % args)
const
bufSize = 1024 # 1 KB is reasonable
bufSize = 1024 # 1 KB is reasonable
proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
## returns true if the contents of the file `f` equal `r`.
@@ -307,7 +307,7 @@ proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
# Read more data
bpos = 0
blen = readBuffer(f, addr(buf[0]), buf.len)
if blen == 0: # no more data in file
if blen == 0: # no more data in file
result = false
return
let n = min(blen - bpos, slen - spos)
@@ -319,7 +319,7 @@ proc equalsFile*(r: Rope, f: File): bool {.rtl, extern: "nro$1File".} =
spos += n
bpos += n
result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
result = readBuffer(f, addr(buf[0]), 1) == 0 # check that we've read all
proc equalsFile*(r: Rope, filename: string): bool {.rtl, extern: "nro$1Str".} =
## returns true if the contents of the file `f` equal `r`. If `f` does not

View File

@@ -897,7 +897,7 @@ proc readLine*(s: Stream, line: var TaintedString): bool =
doAssert strm.readLine(line) == false
doAssert line == ""
strm.close()
if s.readLineImpl != nil:
result = s.readLineImpl(s, line)
else:
@@ -1259,7 +1259,8 @@ else:
result.writeDataImpl = fsWriteData
result.flushImpl = fsFlush
proc newFileStream*(filename: string, mode: FileMode = fmRead, bufSize: int = -1): owned FileStream =
proc newFileStream*(filename: string, mode: FileMode = fmRead,
bufSize: int = -1): owned FileStream =
## Creates a new stream from the file named `filename` with the mode `mode`.
##
## If the file cannot be opened, `nil` is returned. See the `io module
@@ -1296,7 +1297,8 @@ else:
var f: File
if open(f, filename, mode, bufSize): result = newFileStream(f)
proc openFileStream*(filename: string, mode: FileMode = fmRead, bufSize: int = -1): owned FileStream =
proc openFileStream*(filename: string, mode: FileMode = fmRead,
bufSize: int = -1): owned FileStream =
## Creates a new stream from the file named `filename` with the mode `mode`.
## If the file cannot be opened, an IO exception is raised.
##
@@ -1390,11 +1392,11 @@ when false:
else:
var flags: cint
case mode
of fmRead: flags = posix.O_RDONLY
of fmWrite: flags = O_WRONLY or int(O_CREAT)
of fmReadWrite: flags = O_RDWR or int(O_CREAT)
of fmRead: flags = posix.O_RDONLY
of fmWrite: flags = O_WRONLY or int(O_CREAT)
of fmReadWrite: flags = O_RDWR or int(O_CREAT)
of fmReadWriteExisting: flags = O_RDWR
of fmAppend: flags = O_WRONLY or int(O_CREAT) or O_APPEND
of fmAppend: flags = O_WRONLY or int(O_CREAT) or O_APPEND
var handle = open(filename, flags)
if handle < 0: raise newEOS("posix.open() call failed")
result = newFileHandleStream(handle)

View File

@@ -273,7 +273,8 @@ proc mkDigit(v: int, typ: char): string {.inline.} =
else:
result = $chr(ord(if typ == 'x': 'a' else: 'A') + v - 10)
proc alignString*(s: string, minimumWidth: int; align = '\0'; fill = ' '): string =
proc alignString*(s: string, minimumWidth: int; align = '\0';
fill = ' '): string =
## Aligns ``s`` using ``fill`` char.
## This is only of interest if you want to write a custom ``format`` proc that
## should support the standard format specifiers.
@@ -294,17 +295,18 @@ proc alignString*(s: string, minimumWidth: int; align = '\0'; fill = ' '): strin
type
StandardFormatSpecifier* = object ## Type that describes "standard format specifiers".
fill*, align*: char ## Desired fill and alignment.
sign*: char ## Desired sign.
alternateForm*: bool ## Whether to prefix binary, octal and hex numbers
## with ``0b``, ``0o``, ``0x``.
padWithZero*: bool ## Whether to pad with zeros rather than spaces.
minimumWidth*, precision*: int ## Desired minimum width and precision.
typ*: char ## Type like 'f', 'g' or 'd'.
endPosition*: int ## End position in the format specifier after
## ``parseStandardFormatSpecifier`` returned.
fill*, align*: char ## Desired fill and alignment.
sign*: char ## Desired sign.
alternateForm*: bool ## Whether to prefix binary, octal and hex numbers
## with ``0b``, ``0o``, ``0x``.
padWithZero*: bool ## Whether to pad with zeros rather than spaces.
minimumWidth*, precision*: int ## Desired minimum width and precision.
typ*: char ## Type like 'f', 'g' or 'd'.
endPosition*: int ## End position in the format specifier after
## ``parseStandardFormatSpecifier`` returned.
proc formatInt(n: SomeNumber; radix: int; spec: StandardFormatSpecifier): string =
proc formatInt(n: SomeNumber; radix: int;
spec: StandardFormatSpecifier): string =
## Converts ``n`` to string. If ``n`` is `SomeFloat`, it casts to `int64`.
## Conversion is done using ``radix``. If result's length is lesser than
## ``minimumWidth``, it aligns result to the right or left (depending on ``a``)
@@ -415,7 +417,8 @@ proc parseStandardFormatSpecifier*(s: string; start = 0;
raise newException(ValueError,
"invalid format string, cannot parse: " & s[i..^1])
proc formatValue*[T: SomeInteger](result: var string; value: T; specifier: string) =
proc formatValue*[T: SomeInteger](result: var string; value: T;
specifier: string) =
## Standard format implementation for ``SomeInteger``. It makes little
## sense to call this directly, but it is required to exist
## by the ``&`` macro.
@@ -509,7 +512,8 @@ proc formatValue*(result: var string; value: string; specifier: string) =
setLen(value, runeOffset(value, spec.precision))
result.add alignString(value, spec.minimumWidth, spec.align, spec.fill)
proc formatValue[T: not SomeInteger](result: var string; value: T; specifier: string) =
proc formatValue[T: not SomeInteger](result: var string; value: T;
specifier: string) =
mixin `$`
formatValue(result, $value, specifier)
@@ -526,12 +530,13 @@ macro `&`*(pattern: string): untyped =
let f = pattern.strVal
var i = 0
let res = genSym(nskVar, "fmtRes")
result = newNimNode(nnkStmtListExpr, lineInfoFrom=pattern)
result = newNimNode(nnkStmtListExpr, lineInfoFrom = pattern)
# XXX: https://github.com/nim-lang/Nim/issues/8405
# When compiling with -d:useNimRtl, certain procs such as `count` from the strutils
# module are not accessible at compile-time:
let expectedGrowth = when defined(useNimRtl): 0 else: count(f, '{') * 10
result.add newVarStmt(res, newCall(bindSym"newStringOfCap", newLit(f.len + expectedGrowth)))
result.add newVarStmt(res, newCall(bindSym"newStringOfCap",
newLit(f.len + expectedGrowth)))
var strlit = ""
while i < f.len:
if f[i] == '{':

View File

@@ -12,7 +12,7 @@
import strutils
{.deadCodeElim: on.} # dce option deprecated
{.deadCodeElim: on.} # dce option deprecated
proc expandTabs*(s: string, tabSize: int = 8): string {.noSideEffect,
procvar.} =

View File

@@ -325,7 +325,8 @@ macro scanf*(input: string; pattern: static[string]; results: varargs[typed]): b
var idx = genSym(nskVar, "idx")
var res = genSym(nskVar, "res")
let inp = genSym(nskLet, "inp")
result = newTree(nnkStmtListExpr, newLetStmt(inp, input), newVarStmt(idx, newLit 0), newVarStmt(res, newLit false))
result = newTree(nnkStmtListExpr, newLetStmt(inp, input),
newVarStmt(idx, newLit 0), newVarStmt(res, newLit false))
var conds = newTree(nnkStmtList)
var fullMatch = false
while p < pattern.len:
@@ -334,7 +335,8 @@ macro scanf*(input: string; pattern: static[string]; results: varargs[typed]): b
case pattern[p]
of '$':
var resLen = genSym(nskLet, "resLen")
conds.add newLetStmt(resLen, newCall(bindSym"skip", inp, newLit($pattern[p]), idx))
conds.add newLetStmt(resLen, newCall(bindSym"skip", inp,
newLit($pattern[p]), idx))
conds.add resLen.notZero
conds.add resLen
of 'w':
@@ -374,7 +376,8 @@ macro scanf*(input: string; pattern: static[string]; results: varargs[typed]): b
matchError
inc i
of 's':
conds.add newCall(bindSym"inc", idx, newCall(bindSym"skipWhitespace", inp, idx))
conds.add newCall(bindSym"inc", idx,
newCall(bindSym"skipWhitespace", inp, idx))
conds.add newEmptyNode()
conds.add newEmptyNode()
of '.':
@@ -385,13 +388,14 @@ macro scanf*(input: string; pattern: static[string]; results: varargs[typed]): b
of '*', '+':
if i < results.len and getType(results[i]).typeKind == ntyString:
var min = ord(pattern[p] == '+')
var q=p+1
var q = p+1
var token = ""
while q < pattern.len and pattern[q] != '$':
token.add pattern[q]
inc q
var resLen = genSym(nskLet, "resLen")
conds.add newLetStmt(resLen, newCall(bindSym"parseUntil", inp, results[i], newLit(token), idx))
conds.add newLetStmt(resLen, newCall(bindSym"parseUntil", inp,
results[i], newLit(token), idx))
conds.add newCall(bindSym"!=", resLen, newLit min)
conds.add resLen
else:
@@ -563,8 +567,8 @@ macro scanp*(input, idx: typed; pattern: varargs[untyped]): bool =
if a.cond.kind == nnkEmpty or b.cond.kind == nnkEmpty:
error("'|' operator applied to a non-condition")
else:
result = (newStmtList(a.init,
newIfStmt((a.cond, a.action), (newTree(nnkStmtListExpr, b.init, b.cond), b.action))),
result = (newStmtList(a.init, newIfStmt((a.cond, a.action),
(newTree(nnkStmtListExpr, b.init, b.cond), b.action))),
newEmptyNode(), newEmptyNode())
elif it.kind == nnkInfix and it[0].eqIdent"^*":
# a ^* b is rewritten to: (a *(b a))?
@@ -582,18 +586,22 @@ macro scanp*(input, idx: typed; pattern: varargs[untyped]): bool =
else:
var resLen = genSym(nskLet, "resLen")
result = (newLetStmt(resLen, placeholder(it, input, idx)),
newCall(interf"success", resLen), !!newCall(interf"nxt", input, idx, resLen))
newCall(interf"success", resLen),
!!newCall(interf"nxt", input, idx, resLen))
of nnkStrLit..nnkTripleStrLit:
var resLen = genSym(nskLet, "resLen")
result = (newLetStmt(resLen, newCall(interf"skip", input, it, idx)),
newCall(interf"success", resLen), !!newCall(interf"nxt", input, idx, resLen))
newCall(interf"success", resLen),
!!newCall(interf"nxt", input, idx, resLen))
of nnkCurly, nnkAccQuoted, nnkCharLit:
result = (newEmptyNode(), newCall(interf"atom", input, idx, it), !!newCall(interf"nxt", input, idx))
result = (newEmptyNode(), newCall(interf"atom", input, idx, it),
!!newCall(interf"nxt", input, idx))
of nnkCurlyExpr:
if it.len == 3 and it[1].kind == nnkIntLit and it[2].kind == nnkIntLit:
var h = newTree(nnkTupleConstr, it[0])
for count in 2i64 .. it[1].intVal: h.add(it[0])
for count in it[1].intVal .. it[2].intVal-1: h.add(newTree(nnkPrefix, ident"?", it[0]))
for count in it[1].intVal .. it[2].intVal-1:
h.add(newTree(nnkPrefix, ident"?", it[0]))
result = atm(h, input, idx, attached)
elif it.len == 2 and it[1].kind == nnkIntLit:
var h = newTree(nnkTupleConstr, it[0])
@@ -617,7 +625,7 @@ macro scanp*(input, idx: typed; pattern: varargs[untyped]): bool =
#var idx = genSym(nskVar, "idx")
var res = genSym(nskVar, "res")
result = newTree(nnkStmtListExpr, #newVarStmt(idx, newCall(interf"prepare", input)),
newVarStmt(res, newLit false))
newVarStmt(res, newLit false))
var conds: seq[StmtTriple] = @[]
for it in pattern:
conds.add atm(it, input, idx, nil)
@@ -635,7 +643,7 @@ when isMainModule:
else:
result = 0
proc someSep(input: string; start: int; seps: set[char] = {';',',','-','.'}): int =
proc someSep(input: string; start: int; seps: set[char] = {';', ',', '-', '.'}): int =
result = 0
while start+result < input.len and input[start+result] in seps: inc result
@@ -660,7 +668,7 @@ when isMainModule:
var info = ""
if scanp(resp, idx, *`whites`, '#', *`digits`, +`whites`, ?("0x", *`hexdigits`, " in "),
demangle($input, prc, $index), *`whites`, '(', * ~ ')', ')',
*`whites`, "at ", +(~{'\C', '\L'} -> info.add($_)) ):
*`whites`, "at ", +(~{'\C', '\L'} -> info.add($_))):
result.add prc & " " & info
else:
break
@@ -689,21 +697,25 @@ when isMainModule:
let xx2 = scanf("$1234", "$$$i", intval)
doAssert xx2
let yy = scanf(";.--Breakpoint00 [output]", "$[someSep]Breakpoint${twoDigits}$[someSep({';','.','-'})] [$+]$.", intVal, key)
let yy = scanf(";.--Breakpoint00 [output]",
"$[someSep]Breakpoint${twoDigits}$[someSep({';','.','-'})] [$+]$.",
intVal, key)
doAssert yy
doAssert key == "output"
doAssert intVal == 13
var ident = ""
var idx = 0
let zz = scanp("foobar x x x xWZ", idx, +{'a'..'z'} -> add(ident, $_), *(*{' ', '\t'}, "x"), ~'U', "Z")
let zz = scanp("foobar x x x xWZ", idx, +{'a'..'z'} -> add(ident, $_), *(*{
' ', '\t'}, "x"), ~'U', "Z")
doAssert zz
doAssert ident == "foobar"
const digits = {'0'..'9'}
var year = 0
var idx2 = 0
if scanp("201655-8-9", idx2, `digits`{4,6} -> (year = year * 10 + ord($_) - ord('0')), "-8", "-9"):
if scanp("201655-8-9", idx2, `digits`{4, 6} -> (year = year * 10 + ord($_) -
ord('0')), "-8", "-9"):
doAssert year == 201655
const gdbOut = """

View File

@@ -60,10 +60,10 @@ else:
include "system/inclrtl"
type
StringTableMode* = enum ## Describes the tables operation mode.
modeCaseSensitive, ## the table is case sensitive
modeCaseInsensitive, ## the table is case insensitive
modeStyleInsensitive ## the table is style insensitive
StringTableMode* = enum ## Describes the tables operation mode.
modeCaseSensitive, ## the table is case sensitive
modeCaseInsensitive, ## the table is case insensitive
modeStyleInsensitive ## the table is style insensitive
KeyValuePair = tuple[key, val: string, hasValue: bool]
KeyValuePairSeq = seq[KeyValuePair]
StringTableObj* = object of RootObj
@@ -73,15 +73,15 @@ type
StringTableRef* = ref StringTableObj
FormatFlag* = enum ## Flags for the `%` operator.
useEnvironment, ## Use environment variable if the ``$key``
## is not found in the table.
## Does nothing when using `js` target.
useEmpty, ## Use the empty string as a default, thus it
## won't throw an exception if ``$key`` is not
## in the table.
useKey ## Do not replace ``$key`` if it is not found
## in the table (or in the environment).
FormatFlag* = enum ## Flags for the `%` operator.
useEnvironment, ## Use environment variable if the ``$key``
## is not found in the table.
## Does nothing when using `js` target.
useEmpty, ## Use the empty string as a default, thus it
## won't throw an exception if ``$key`` is not
## in the table.
useKey ## Do not replace ``$key`` if it is not found
## in the table (or in the environment).
const
growthFactor = 2
@@ -168,7 +168,8 @@ proc `[]`*(t: StringTableRef, key: string): var string {.
echo t["occupation"]
get(t, key)
proc getOrDefault*(t: StringTableRef; key: string, default: string = ""): string =
proc getOrDefault*(t: StringTableRef; key: string,
default: string = ""): string =
## Retrieves the location at ``t[key]``.
##
## If `key` is not in `t`, the default value is returned (if not specified,
@@ -191,7 +192,8 @@ proc getOrDefault*(t: StringTableRef; key: string, default: string = ""): string
if index >= 0: result = t.data[index].val
else: result = default
proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc, extern: "nst$1".} =
proc hasKey*(t: StringTableRef, key: string): bool {.rtlFunc,
extern: "nst$1".} =
## Returns true if `key` is in the table `t`.
##
## See also:
@@ -276,8 +278,8 @@ proc newStringTable*(keyValuePairs: varargs[string],
inc(i, 2)
proc newStringTable*(keyValuePairs: varargs[tuple[key, val: string]],
mode: StringTableMode = modeCaseSensitive): owned(StringTableRef) {.
rtlFunc, extern: "nst$1WithTableConstr".} =
mode: StringTableMode = modeCaseSensitive): owned(StringTableRef) {.
rtlFunc, extern: "nst$1WithTableConstr".} =
## Creates a new string table with given `(key, value)` tuple pairs.
##
## The default mode is case sensitive.
@@ -343,17 +345,17 @@ proc del*(t: StringTableRef, key: string) =
if i >= 0:
dec(t.counter)
block outer:
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
var j = i # The correctness of this depends on (h+1) in nextTry,
var r = j # though may be adaptable to other simple sequences.
t.data[i].hasValue = false # mark current EMPTY
while true: # KnuthV3 Algo6.4R adapted for i=i+1 instead of i=i-1
var j = i # The correctness of this depends on (h+1) in nextTry,
var r = j # though may be adaptable to other simple sequences.
t.data[i].hasValue = false # mark current EMPTY
t.data[i].key = ""
t.data[i].val = ""
while true:
i = (i + 1) and msk # increment mod table size
if not t.data[i].hasValue: # end of collision cluster; So all done
i = (i + 1) and msk # increment mod table size
if not t.data[i].hasValue: # end of collision cluster; So all done
break outer
r = t.myhash(t.data[i].key) and msk # "home" location of key@i
r = t.myhash(t.data[i].key) and msk # "home" location of key@i
if not ((i >= r and r > j) or (r > j and j > i) or (j > i and i >= r)):
break
when defined(js):

View File

@@ -80,9 +80,9 @@ when defined(nimVmExportFixed):
from unicode import toLower, toUpper
export toLower, toUpper
{.deadCodeElim: on.} # dce option deprecated
{.deadCodeElim: on.} # dce option deprecated
{.push debugger:off .} # the user does not want to trace a part
{.push debugger: off.} # the user does not want to trace a part
# of the standard library!
include "system/inclrtl"
@@ -132,7 +132,7 @@ const
## doAssert "01A34".find(invalid) == 2
proc isAlphaAscii*(c: char): bool {.noSideEffect, procvar,
rtl, extern: "nsuIsAlphaAsciiChar".}=
rtl, extern: "nsuIsAlphaAsciiChar".} =
## Checks whether or not character `c` is alphabetical.
##
## This checks a-z, A-Z ASCII characters only.
@@ -333,8 +333,8 @@ proc cmpIgnoreCase*(a, b: string): int {.noSideEffect,
inc(i)
result = a.len - b.len
{.push checks: off, line_trace: off .} # this is a hot-spot in the compiler!
# thus we compile without checks here
{.push checks: off, line_trace: off.} # this is a hot-spot in the compiler!
# thus we compile without checks here
proc cmpIgnoreStyle*(a, b: string): int {.noSideEffect,
rtl, extern: "nsuCmpIgnoreStyle", procvar.} =
@@ -763,8 +763,9 @@ proc split*(s: string, sep: string, maxsplit: int = -1): seq[string] {.noSideEff
doAssert "a,b,c".split(",") == @["a", "b", "c"]
doAssert "a man a plan a canal panama".split("a ") == @["", "man ", "plan ", "canal panama"]
doAssert "".split("Elon Musk") == @[""]
doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"]
doAssert "a largely spaced sentence".split(" ", maxsplit=1) == @["a", " largely spaced sentence"]
doAssert "a largely spaced sentence".split(" ") == @["a", "", "largely",
"", "", "", "spaced", "sentence"]
doAssert "a largely spaced sentence".split(" ", maxsplit = 1) == @["a", " largely spaced sentence"]
doAssert(sep.len > 0)
accResult(split(s, sep, maxsplit))
@@ -849,11 +850,14 @@ proc rsplit*(s: string, sep: string, maxsplit: int = -1): seq[string]
## * `splitLines proc<#splitLines,string>`_
## * `splitWhitespace proc<#splitWhitespace,string,int>`_
runnableExamples:
doAssert "a largely spaced sentence".rsplit(" ", maxsplit=1) == @["a largely spaced", "sentence"]
doAssert "a largely spaced sentence".rsplit(" ", maxsplit = 1) == @[
"a largely spaced", "sentence"]
doAssert "a,b,c".rsplit(",") == @["a", "b", "c"]
doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ", "plan ", "canal panama"]
doAssert "a man a plan a canal panama".rsplit("a ") == @["", "man ",
"plan ", "canal panama"]
doAssert "".rsplit("Elon Musk") == @[""]
doAssert "a largely spaced sentence".rsplit(" ") == @["a", "", "largely", "", "", "", "spaced", "sentence"]
doAssert "a largely spaced sentence".rsplit(" ") == @["a", "",
"largely", "", "", "", "spaced", "sentence"]
accResult(rsplit(s, sep, maxsplit))
result.reverse()
@@ -866,7 +870,7 @@ proc splitLines*(s: string, keepEol = false): seq[string] {.noSideEffect,
## * `splitLines iterator<#splitLines.i,string>`_
## * `splitWhitespace proc<#splitWhitespace,string,int>`_
## * `countLines proc<#countLines,string>`_
accResult(splitLines(s, keepEol=keepEol))
accResult(splitLines(s, keepEol = keepEol))
proc splitWhitespace*(s: string, maxsplit: int = -1): seq[string] {.noSideEffect,
rtl, extern: "nsuSplitWhitespace".} =
@@ -1185,7 +1189,7 @@ proc generateHexCharToValueMap(): string =
of '0'..'9': inp - ord('0')
of 'a'..'f': inp - ord('a') + 10
of 'A'..'F': inp - ord('A') + 10
else: 17 # indicates an invalid hex char
else: 17 # indicates an invalid hex char
result.add chr(o)
const hexCharToValueMap = generateHexCharToValueMap()
@@ -1346,7 +1350,8 @@ proc align*(s: string, count: Natural, padding = ' '): string {.
else:
result = s
proc alignLeft*(s: string, count: Natural, padding = ' '): string {.noSideEffect.} =
proc alignLeft*(s: string, count: Natural, padding = ' '): string {.
noSideEffect.} =
## Left-Aligns a string `s` with `padding`, so that it is of length `count`.
##
## `padding` characters (by default spaces) are added after `s` resulting in
@@ -1603,15 +1608,15 @@ proc removePrefix*(s: var string, chars: set[char] = Newlines) {.
## See also:
## * `removeSuffix proc<#removeSuffix,string,set[char]>`_
runnableExamples:
var userInput = "\r\n*~Hello World!"
userInput.removePrefix
doAssert userInput == "*~Hello World!"
userInput.removePrefix({'~', '*'})
doAssert userInput == "Hello World!"
var userInput = "\r\n*~Hello World!"
userInput.removePrefix
doAssert userInput == "*~Hello World!"
userInput.removePrefix({'~', '*'})
doAssert userInput == "Hello World!"
var otherInput = "?!?Hello!?!"
otherInput.removePrefix({'!', '?'})
doAssert otherInput == "Hello!?!"
var otherInput = "?!?Hello!?!"
otherInput.removePrefix({'!', '?'})
doAssert otherInput == "Hello!?!"
var start = 0
while start < s.len and s[start] in chars: start += 1
@@ -1639,9 +1644,9 @@ proc removePrefix*(s: var string, prefix: string) {.
## * `removeSuffix proc<#removeSuffix,string,string>`_
## * `startsWith proc<#startsWith,string,string>`_
runnableExamples:
var answers = "yesyes"
answers.removePrefix("yes")
doAssert answers == "yes"
var answers = "yesyes"
answers.removePrefix("yes")
doAssert answers == "yes"
if s.startsWith(prefix):
s.delete(0, prefix.len - 1)
@@ -1653,15 +1658,15 @@ proc removeSuffix*(s: var string, chars: set[char] = Newlines) {.
## See also:
## * `removePrefix proc<#removePrefix,string,set[char]>`_
runnableExamples:
var userInput = "Hello World!*~\r\n"
userInput.removeSuffix
doAssert userInput == "Hello World!*~"
userInput.removeSuffix({'~', '*'})
doAssert userInput == "Hello World!"
var userInput = "Hello World!*~\r\n"
userInput.removeSuffix
doAssert userInput == "Hello World!*~"
userInput.removeSuffix({'~', '*'})
doAssert userInput == "Hello World!"
var otherInput = "Hello!?!"
otherInput.removeSuffix({'!', '?'})
doAssert otherInput == "Hello"
var otherInput = "Hello!?!"
otherInput.removeSuffix({'!', '?'})
doAssert otherInput == "Hello"
if s.len == 0: return
var last = s.high
@@ -1720,7 +1725,7 @@ proc addSep*(dest: var string, sep = ", ", startLen: Natural = 0)
runnableExamples:
var arr = "["
for x in items([2, 3, 5, 7, 11]):
addSep(arr, startLen=len("["))
addSep(arr, startLen = len("["))
add(arr, $x)
add(arr, "]")
doAssert arr == "[2, 3, 5, 7, 11]"
@@ -1819,7 +1824,7 @@ proc find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int
##
## Searching is case-sensitive. If `sub` is not in `s`, -1 is returned.
let
last = if last==0: s.high else: last
last = if last == 0: s.high else: last
subLast = sub.len - 1
if subLast == -1:
@@ -1842,7 +1847,7 @@ proc find*(a: SkipTable, s, sub: string, start: Natural = 0, last = 0): int
when not (defined(js) or defined(nimdoc) or defined(nimscript)):
proc c_memchr(cstr: pointer, c: char, n: csize): pointer {.
importc: "memchr", header: "<string.h>" .}
importc: "memchr", header: "<string.h>".}
const hasCStringBuiltin = true
else:
const hasCStringBuiltin = false
@@ -1859,7 +1864,7 @@ proc find*(s: string, sub: char, start: Natural = 0, last = 0): int {.noSideEffe
## See also:
## * `rfind proc<#rfind,string,char,int,int>`_
## * `replace proc<#replace,string,char,char>`_
let last = if last==0: s.high else: last
let last = if last == 0: s.high else: last
when nimvm:
for i in int(start)..last:
if sub == s[i]: return i
@@ -1887,7 +1892,7 @@ proc find*(s: string, chars: set[char], start: Natural = 0, last = 0): int {.noS
## See also:
## * `rfind proc<#rfind,string,set[char],int,int>`_
## * `multiReplace proc<#multiReplace,string,varargs[]>`_
let last = if last==0: s.high else: last
let last = if last == 0: s.high else: last
for i in int(start)..last:
if s[i] in chars: return i
return -1
@@ -2140,7 +2145,8 @@ proc replaceWord*(s, sub: string, by = ""): string {.noSideEffect,
# copy the rest:
add result, substr(s, i)
proc multiReplace*(s: string, replacements: varargs[(string, string)]): string {.noSideEffect.} =
proc multiReplace*(s: string, replacements: varargs[(string, string)]):
string {.noSideEffect.} =
## Same as replace, but specialized for doing multiple replacements in a single
## pass through the input string.
##
@@ -2246,7 +2252,7 @@ proc unescape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect,
of 'x':
inc i, 2
var c: int
i += parseutils.parseHex(s, c, i, maxLen=2)
i += parseutils.parseHex(s, c, i, maxLen = 2)
result.add(chr(c))
dec i, 2
of '\\':
@@ -2416,8 +2422,8 @@ proc trimZeros*(x: var string) {.noSideEffect.} =
type
BinaryPrefixMode* = enum ## the different names for binary prefixes
bpIEC, # use the IEC/ISO standard prefixes such as kibi
bpColloquial # use the colloquial kilo, mega etc
bpIEC, # use the IEC/ISO standard prefixes such as kibi
bpColloquial # use the colloquial kilo, mega etc
proc formatSize*(bytes: int64,
decimalSep = '.',
@@ -2437,10 +2443,10 @@ proc formatSize*(bytes: int64,
runnableExamples:
doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB"
doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
doAssert formatSize(4096, includeSpace=true) == "4 KiB"
doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
doAssert formatSize(4096, includeSpace = true) == "4 KiB"
doAssert formatSize(4096, prefix = bpColloquial, includeSpace = true) == "4 kB"
doAssert formatSize(4096) == "4KiB"
doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
doAssert formatSize(5_378_934, prefix = bpColloquial, decimalSep = ',') == "5,13MB"
const iecPrefixes = ["", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"]
const collPrefixes = ["", "k", "M", "G", "T", "P", "E", "Z", "Y"]
@@ -2467,7 +2473,8 @@ proc formatSize*(bytes: int64,
break
# xb has the integer number for the latest value; index should be correct
fbytes = bytes.float / (1'i64 shl (matchedIndex*10)).float
result = formatFloat(fbytes, format=ffDecimal, precision=3, decimalSep=decimalSep)
result = formatFloat(fbytes, format = ffDecimal, precision = 3,
decimalSep = decimalSep)
result.trimZeros()
if includeSpace:
result &= " "
@@ -2551,7 +2558,8 @@ proc formatEng*(f: BiggestFloat,
## Get the SI prefix for a given exponent
##
## Assumes exponent is a multiple of 3; returns ' ' if no prefix found
const siPrefixes = ['a','f','p','n','u','m',' ','k','M','G','T','P','E']
const siPrefixes = ['a', 'f', 'p', 'n', 'u', 'm', ' ', 'k', 'M', 'G', 'T',
'P', 'E']
var index: int = (exp div 3) + 6
result = ' '
if index in low(siPrefixes)..high(siPrefixes):
@@ -2564,7 +2572,8 @@ proc formatEng*(f: BiggestFloat,
if absolute == 0.0:
# Simple case: just format it and force the exponent to 0
exponent = 0
result = significand.formatBiggestFloat(ffDecimal, precision, decimalSep='.')
result = significand.formatBiggestFloat(ffDecimal, precision,
decimalSep = '.')
else:
# Find the best exponent that's a multiple of 3
fexponent = floor(log10(absolute))
@@ -2579,7 +2588,8 @@ proc formatEng*(f: BiggestFloat,
significand *= 0.001
fexponent += 3
# Components of the result:
result = significand.formatBiggestFloat(ffDecimal, precision, decimalSep='.')
result = significand.formatBiggestFloat(ffDecimal, precision,
decimalSep = '.')
exponent = fexponent.int()
splitResult = result.split('.')
@@ -2952,7 +2962,8 @@ proc isNilOrEmpty*(s: string): bool {.noSideEffect, procvar, rtl,
## Checks if `s` is nil or empty.
result = len(s) == 0
proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nsuIsNilOrWhitespace".} =
proc isNilOrWhitespace*(s: string): bool {.noSideEffect, procvar, rtl,
extern: "nsuIsNilOrWhitespace".} =
## Checks if `s` is nil or consists entirely of whitespace characters.
result = true
for c in s:
@@ -3124,11 +3135,11 @@ when isMainModule:
proc nonStaticTests =
doAssert formatBiggestFloat(1234.567, ffDecimal, -1) == "1234.567000"
when not defined(js):
doAssert formatBiggestFloat(1234.567, ffDecimal, 0) == "1235." # <=== bug 8242
doAssert formatBiggestFloat(1234.567, ffDecimal, 0) == "1235." # <=== bug 8242
doAssert formatBiggestFloat(1234.567, ffDecimal, 1) == "1234.6"
doAssert formatBiggestFloat(0.00000000001, ffDecimal, 11) == "0.00000000001"
doAssert formatBiggestFloat(0.00000000001, ffScientific, 1, ',') in
["1,0e-11", "1,0e-011"]
["1,0e-11", "1,0e-011"]
# bug #6589
when not defined(js):
doAssert formatFloat(123.456, ffScientific, precision = -1) == "1.234560e+02"
@@ -3138,41 +3149,45 @@ when isMainModule:
block: # formatSize tests
when not defined(js):
doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" # <=== bug #8231
doAssert formatSize((1'i64 shl 31) + (300'i64 shl 20)) == "2.293GiB" # <=== bug #8231
doAssert formatSize((2.234*1024*1024).int) == "2.234MiB"
doAssert formatSize(4096) == "4KiB"
doAssert formatSize(4096, prefix=bpColloquial, includeSpace=true) == "4 kB"
doAssert formatSize(4096, includeSpace=true) == "4 KiB"
doAssert formatSize(5_378_934, prefix=bpColloquial, decimalSep=',') == "5,13MB"
doAssert formatSize(4096, prefix = bpColloquial, includeSpace = true) == "4 kB"
doAssert formatSize(4096, includeSpace = true) == "4 KiB"
doAssert formatSize(5_378_934, prefix = bpColloquial, decimalSep = ',') == "5,13MB"
block: # formatEng tests
doAssert formatEng(0, 2, trim=false) == "0.00"
doAssert formatEng(0, 2, trim = false) == "0.00"
doAssert formatEng(0, 2) == "0"
doAssert formatEng(53, 2, trim=false) == "53.00"
doAssert formatEng(0.053, 2, trim=false) == "53.00e-3"
doAssert formatEng(0.053, 4, trim=false) == "53.0000e-3"
doAssert formatEng(0.053, 4, trim=true) == "53e-3"
doAssert formatEng(53, 2, trim = false) == "53.00"
doAssert formatEng(0.053, 2, trim = false) == "53.00e-3"
doAssert formatEng(0.053, 4, trim = false) == "53.0000e-3"
doAssert formatEng(0.053, 4, trim = true) == "53e-3"
doAssert formatEng(0.053, 0) == "53e-3"
doAssert formatEng(52731234) == "52.731234e6"
doAssert formatEng(-52731234) == "-52.731234e6"
doAssert formatEng(52731234, 1) == "52.7e6"
doAssert formatEng(-52731234, 1) == "-52.7e6"
doAssert formatEng(52731234, 1, decimalSep=',') == "52,7e6"
doAssert formatEng(-52731234, 1, decimalSep=',') == "-52,7e6"
doAssert formatEng(52731234, 1, decimalSep = ',') == "52,7e6"
doAssert formatEng(-52731234, 1, decimalSep = ',') == "-52,7e6"
doAssert formatEng(4100, siPrefix=true, unit="V") == "4.1 kV"
doAssert formatEng(4.1, siPrefix=true, unit="V", useUnitSpace=true) == "4.1 V"
doAssert formatEng(4.1, siPrefix=true) == "4.1" # Note lack of space
doAssert formatEng(4100, siPrefix=true) == "4.1 k"
doAssert formatEng(4.1, siPrefix=true, unit="", useUnitSpace=true) == "4.1 " # Includes space
doAssert formatEng(4100, siPrefix=true, unit="") == "4.1 k"
doAssert formatEng(4100, siPrefix = true, unit = "V") == "4.1 kV"
doAssert formatEng(4.1, siPrefix = true, unit = "V",
useUnitSpace = true) == "4.1 V"
doAssert formatEng(4.1, siPrefix = true) == "4.1" # Note lack of space
doAssert formatEng(4100, siPrefix = true) == "4.1 k"
doAssert formatEng(4.1, siPrefix = true, unit = "",
useUnitSpace = true) == "4.1 " # Includes space
doAssert formatEng(4100, siPrefix = true, unit = "") == "4.1 k"
doAssert formatEng(4100) == "4.1e3"
doAssert formatEng(4100, unit="V", useUnitSpace=true) == "4.1e3 V"
doAssert formatEng(4100, unit="", useUnitSpace=true) == "4.1e3 "
doAssert formatEng(4100, unit = "V", useUnitSpace = true) == "4.1e3 V"
doAssert formatEng(4100, unit = "", useUnitSpace = true) == "4.1e3 "
# Don't use SI prefix as number is too big
doAssert formatEng(3.1e22, siPrefix=true, unit="a", useUnitSpace=true) == "31e21 a"
doAssert formatEng(3.1e22, siPrefix = true, unit = "a",
useUnitSpace = true) == "31e21 a"
# Don't use SI prefix as number is too small
doAssert formatEng(3.1e-25, siPrefix=true, unit="A", useUnitSpace=true) == "310e-27 A"
doAssert formatEng(3.1e-25, siPrefix = true, unit = "A",
useUnitSpace = true) == "310e-27 A"
proc staticTests =
doAssert align("abc", 4) == " abc"
@@ -3218,7 +3233,7 @@ when isMainModule:
doAssert count("foofoofoo", "foofoo") == 1
doAssert count("foofoofoo", "foofoo", overlapping = true) == 2
doAssert count("foofoofoo", 'f') == 3
doAssert count("foofoofoobar", {'f','b'}) == 4
doAssert count("foofoofoobar", {'f', 'b'}) == 4
doAssert strip(" foofoofoo ") == "foofoofoo"
doAssert strip("sfoofoofoos", chars = {'s'}) == "foofoofoo"
@@ -3232,7 +3247,8 @@ when isMainModule:
doAssert " foo\n bar".indent(4, "Q") == "QQQQ foo\nQQQQ bar"
doAssert "abba".multiReplace(("a", "b"), ("b", "a")) == "baab"
doAssert "Hello World.".multiReplace(("ello", "ELLO"), ("World.", "PEOPLE!")) == "HELLO PEOPLE!"
doAssert "Hello World.".multiReplace(("ello", "ELLO"), ("World.",
"PEOPLE!")) == "HELLO PEOPLE!"
doAssert "aaaa".multiReplace(("a", "aa"), ("aa", "bb")) == "aaaaaaaa"
doAssert isAlphaAscii('r')
@@ -3268,13 +3284,14 @@ when isMainModule:
doAssert(not isUpperAscii('5'))
doAssert(not isUpperAscii('%'))
doAssert rsplit("foo bar", seps=Whitespace) == @["foo", "bar"]
doAssert rsplit(" foo bar", seps=Whitespace, maxsplit=1) == @[" foo", "bar"]
doAssert rsplit(" foo bar ", seps=Whitespace, maxsplit=1) == @[" foo bar", ""]
doAssert rsplit(":foo:bar", sep=':') == @["", "foo", "bar"]
doAssert rsplit(":foo:bar", sep=':', maxsplit=2) == @["", "foo", "bar"]
doAssert rsplit(":foo:bar", sep=':', maxsplit=3) == @["", "foo", "bar"]
doAssert rsplit("foothebar", sep="the") == @["foo", "bar"]
doAssert rsplit("foo bar", seps = Whitespace) == @["foo", "bar"]
doAssert rsplit(" foo bar", seps = Whitespace, maxsplit = 1) == @[" foo", "bar"]
doAssert rsplit(" foo bar ", seps = Whitespace, maxsplit = 1) == @[
" foo bar", ""]
doAssert rsplit(":foo:bar", sep = ':') == @["", "foo", "bar"]
doAssert rsplit(":foo:bar", sep = ':', maxsplit = 2) == @["", "foo", "bar"]
doAssert rsplit(":foo:bar", sep = ':', maxsplit = 3) == @["", "foo", "bar"]
doAssert rsplit("foothebar", sep = "the") == @["foo", "bar"]
doAssert(unescape(r"\x013", "", "") == "\x013")
@@ -3315,16 +3332,17 @@ bar
let s2 = ":this;is;an:example;;"
doAssert s.split() == @["", "this", "is", "an", "example", "", ""]
doAssert s2.split(seps={':', ';'}) == @["", "this", "is", "an", "example", "", ""]
doAssert s.split(maxsplit=4) == @["", "this", "is", "an", "example "]
doAssert s.split(' ', maxsplit=1) == @["", "this is an example "]
doAssert s.split(" ", maxsplit=4) == @["", "this", "is", "an", "example "]
doAssert s2.split(seps = {':', ';'}) == @["", "this", "is", "an", "example",
"", ""]
doAssert s.split(maxsplit = 4) == @["", "this", "is", "an", "example "]
doAssert s.split(' ', maxsplit = 1) == @["", "this is an example "]
doAssert s.split(" ", maxsplit = 4) == @["", "this", "is", "an", "example "]
doAssert s.splitWhitespace() == @["this", "is", "an", "example"]
doAssert s.splitWhitespace(maxsplit=1) == @["this", "is an example "]
doAssert s.splitWhitespace(maxsplit=2) == @["this", "is", "an example "]
doAssert s.splitWhitespace(maxsplit=3) == @["this", "is", "an", "example "]
doAssert s.splitWhitespace(maxsplit=4) == @["this", "is", "an", "example"]
doAssert s.splitWhitespace(maxsplit = 1) == @["this", "is an example "]
doAssert s.splitWhitespace(maxsplit = 2) == @["this", "is", "an example "]
doAssert s.splitWhitespace(maxsplit = 3) == @["this", "is", "an", "example "]
doAssert s.splitWhitespace(maxsplit = 4) == @["this", "is", "an", "example"]
block: # startsWith / endsWith char tests
var s = "abcdef"

View File

@@ -21,7 +21,7 @@
## * `encodings module <encodings.html>`_
{.deadCodeElim: on.} # dce option deprecated
{.deadCodeElim: on.} # dce option deprecated
include "system/inclrtl"
@@ -95,8 +95,8 @@ template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
# assert(uint(s[i+2]) shr 6 == 0b10)
if i <= s.len - 3:
result = Rune((uint(s[i]) and ones(4)) shl 12 or
(uint(s[i+1]) and ones(6)) shl 6 or
(uint(s[i+2]) and ones(6)))
(uint(s[i+1]) and ones(6)) shl 6 or
(uint(s[i+2]) and ones(6)))
when doInc: inc(i, 3)
else:
result = replRune
@@ -107,9 +107,9 @@ template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
# assert(uint(s[i+3]) shr 6 == 0b10)
if i <= s.len - 4:
result = Rune((uint(s[i]) and ones(3)) shl 18 or
(uint(s[i+1]) and ones(6)) shl 12 or
(uint(s[i+2]) and ones(6)) shl 6 or
(uint(s[i+3]) and ones(6)))
(uint(s[i+1]) and ones(6)) shl 12 or
(uint(s[i+2]) and ones(6)) shl 6 or
(uint(s[i+3]) and ones(6)))
when doInc: inc(i, 4)
else:
result = replRune
@@ -121,10 +121,10 @@ template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
# assert(uint(s[i+4]) shr 6 == 0b10)
if i <= s.len - 5:
result = Rune((uint(s[i]) and ones(2)) shl 24 or
(uint(s[i+1]) and ones(6)) shl 18 or
(uint(s[i+2]) and ones(6)) shl 12 or
(uint(s[i+3]) and ones(6)) shl 6 or
(uint(s[i+4]) and ones(6)))
(uint(s[i+1]) and ones(6)) shl 18 or
(uint(s[i+2]) and ones(6)) shl 12 or
(uint(s[i+3]) and ones(6)) shl 6 or
(uint(s[i+4]) and ones(6)))
when doInc: inc(i, 5)
else:
result = replRune
@@ -137,11 +137,11 @@ template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) =
# assert(uint(s[i+5]) shr 6 == 0b10)
if i <= s.len - 6:
result = Rune((uint(s[i]) and ones(1)) shl 30 or
(uint(s[i+1]) and ones(6)) shl 24 or
(uint(s[i+2]) and ones(6)) shl 18 or
(uint(s[i+3]) and ones(6)) shl 12 or
(uint(s[i+4]) and ones(6)) shl 6 or
(uint(s[i+5]) and ones(6)))
(uint(s[i+1]) and ones(6)) shl 24 or
(uint(s[i+2]) and ones(6)) shl 18 or
(uint(s[i+3]) and ones(6)) shl 12 or
(uint(s[i+4]) and ones(6)) shl 6 or
(uint(s[i+5]) and ones(6)))
when doInc: inc(i, 6)
else:
result = replRune
@@ -410,7 +410,7 @@ proc runeSubStr*(s: string, pos: int, len: int = int.high): string =
if e < 0:
result = ""
else:
result = s.substr(o, runeOffset(s, e-(rl+pos) , o)-1)
result = s.substr(o, runeOffset(s, e-(rl+pos), o)-1)
else:
result = s.substr(o, runeOffset(s, len, o)-1)
else:
@@ -626,7 +626,7 @@ template runeCheck(s, runeProc) =
i = 0
rune: Rune
while i < len(s) and result:
fastRuneAt(s, i, rune, doInc=true)
fastRuneAt(s, i, rune, doInc = true)
result = runeProc(rune) and result
proc isAlpha*(s: string): bool {.noSideEffect, procvar,
@@ -654,9 +654,9 @@ template convertRune(s, runeProc) =
resultIndex = 0
rune: Rune
while i < len(s):
fastRuneAt(s, i, rune, doInc=true)
fastRuneAt(s, i, rune, doInc = true)
rune = runeProc(rune)
fastToUTF8Copy(rune, result, resultIndex, doInc=true)
fastToUTF8Copy(rune, result, resultIndex, doInc = true)
proc toUpper*(s: string): string {.noSideEffect, procvar,
rtl, extern: "nuc$1Str".} =
@@ -692,7 +692,7 @@ proc swapCase*(s: string): string {.noSideEffect, procvar,
rune = rune.toLower()
elif rune.isLower():
rune = rune.toUpper()
fastToUTF8Copy(rune, result, resultIndex, doInc=true)
fastToUTF8Copy(rune, result, resultIndex, doInc = true)
proc capitalize*(s: string): string {.noSideEffect, procvar,
rtl, extern: "nuc$1".} =
@@ -705,7 +705,7 @@ proc capitalize*(s: string): string {.noSideEffect, procvar,
var
rune: Rune
i = 0
fastRuneAt(s, i, rune, doInc=true)
fastRuneAt(s, i, rune, doInc = true)
result = $toUpper(rune) & substr(s, i)
proc translate*(s: string, replacements: proc(key: string): string): string {.
@@ -784,7 +784,7 @@ proc title*(s: string): string {.noSideEffect, procvar,
firstRune = false
elif rune.isWhiteSpace():
firstRune = true
fastToUTF8Copy(rune, result, resultIndex, doInc=true)
fastToUTF8Copy(rune, result, resultIndex, doInc = true)
iterator runes*(s: string): Rune =
@@ -807,7 +807,7 @@ iterator utf8*(s: string): string =
var o = 0
while o < s.len:
let n = runeLenAt(s, o)
yield s[o.. (o+n-1)]
yield s[o .. (o+n-1)]
o += n
proc toRunes*(s: string): seq[Rune] =
@@ -1039,8 +1039,8 @@ iterator split*(s: string, sep: Rune, maxsplit: int = -1): string =
##
splitCommon(s, sep, maxsplit, sep.size)
proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1): seq[string] {.
noSideEffect, rtl, extern: "nucSplitRunes".} =
proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1):
seq[string] {.noSideEffect, rtl, extern: "nucSplitRunes".} =
## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_,
## but is a proc that returns a sequence of substrings.
accResult(split(s, seps, maxsplit))
@@ -1067,7 +1067,7 @@ proc strip*(s: string, leading = true, trailing = true,
doAssert a.strip(trailing = false) == "áñyóng "
var
sI = 0 ## starting index into string ``s``
sI = 0 ## starting index into string ``s``
eI = len(s) - 1 ## ending index into ``s``, where the last ``Rune`` starts
if leading:
var
@@ -1186,7 +1186,7 @@ template runeCaseCheck(s, runeProc, skipNonAlpha) =
rune: Rune
hasAtleastOneAlphaRune = false
while i < len(s):
fastRuneAt(s, i, rune, doInc=true)
fastRuneAt(s, i, rune, doInc = true)
if skipNonAlpha:
var runeIsAlpha = isAlpha(rune)
if not hasAtleastOneAlphaRune:
@@ -1232,9 +1232,8 @@ proc isUpper*(s: string, skipNonAlpha: bool): bool {.
## an empty string.
runeCaseCheck(s, isUpper, skipNonAlpha)
proc isTitle*(s: string): bool {.noSideEffect, procvar,
rtl, extern: "nuc$1Str",
deprecated: "Deprecated since version 0.20 since its semantics are unclear".}=
proc isTitle*(s: string): bool {.noSideEffect, procvar, rtl, extern: "nuc$1Str",
deprecated: "Deprecated since version 0.20 since its semantics are unclear".} =
## **Deprecated since version 0.20 since its semantics are unclear**
##
## Checks whether or not ``s`` is a unicode title.
@@ -1250,7 +1249,7 @@ proc isTitle*(s: string): bool {.noSideEffect, procvar,
var firstRune = true
while i < len(s) and result:
fastRuneAt(s, i, rune, doInc=true)
fastRuneAt(s, i, rune, doInc = true)
if not rune.isWhiteSpace() and firstRune:
result = rune.isUpper() and result
firstRune = false
@@ -1419,8 +1418,10 @@ when isMainModule:
let s2 = ":this;is;an:example;;"
let s3 = ":this×is×an:example××"
doAssert s.split() == @["", "this", "is", "an", "example", "", ""]
doAssert s2.split(seps = [':'.Rune, ';'.Rune]) == @["", "this", "is", "an", "example", "", ""]
doAssert s3.split(seps = [':'.Rune, "×".asRune]) == @["", "this", "is", "an", "example", "", ""]
doAssert s2.split(seps = [':'.Rune, ';'.Rune]) == @["", "this", "is", "an",
"example", "", ""]
doAssert s3.split(seps = [':'.Rune, "×".asRune]) == @["", "this", "is",
"an", "example", "", ""]
doAssert s.split(maxsplit = 4) == @["", "this", "is", "an", "example "]
doAssert s.split(' '.Rune, maxsplit = 1) == @["", "this is an example "]