From 9aa9007d8ff032a704c32b13d58e540fbcfb8bbe Mon Sep 17 00:00:00 2001 From: Ginger Bill Date: Fri, 30 Sep 2016 23:35:06 +0100 Subject: [PATCH] Include other core libs. --- core/hash.odin | 298 +++++++++++++++++++++++++++++++++++++++++++++++++ core/utf8.odin | 212 +++++++++++++++++++++++++++++++++++ 2 files changed, 510 insertions(+) create mode 100644 core/hash.odin create mode 100644 core/utf8.odin diff --git a/core/hash.odin b/core/hash.odin new file mode 100644 index 000000000..2f15f28b3 --- /dev/null +++ b/core/hash.odin @@ -0,0 +1,298 @@ +crc32 :: proc(data: rawptr, len: int) -> u32 { + result := ~(0 as u32) + s := slice_ptr(data as ^u8, len) + for i := 0; i < len; i++ { + b := s[i] as u32 + result = result>>8 ~ __CRC32_TABLE[(result ~ b) & 0xff] + } + return ~result +} +crc64 :: proc(data: rawptr, len: int) -> u64 { + result := ~(0 as u64) + s := slice_ptr(data as ^u8, len) + for i := 0; i < len; i++ { + b := s[i] as u64 + result = result>>8 ~ __CRC64_TABLE[(result ~ b) & 0xff] + } + return ~result +} + +fnv32 :: proc(data: rawptr, len: int) -> u32 { + s := slice_ptr(data as ^u8, len) + + h: u32 = 0x811c9dc5 + for i := 0; i < len; i++ { + h = (h * 0x01000193) ~ s[i] as u32 + } + return h +} + +fnv64 :: proc(data: rawptr, len: int) -> u64 { + s := slice_ptr(data as ^u8, len) + + h: u64 = 0xcbf29ce484222325 + for i := 0; i < len; i++ { + h = (h * 0x100000001b3) ~ s[i] as u64 + } + return h +} + +fnv32a :: proc(data: rawptr, len: int) -> u32 { + s := slice_ptr(data as ^u8, len) + + h: u32 = 0x811c9dc5 + for i := 0; i < len; i++ { + h = (h ~ s[i] as u32) * 0x01000193 + } + return h +} + +fnv64a :: proc(data: rawptr, len: int) -> u64 { + s := slice_ptr(data as ^u8, len) + + h: u64 = 0xcbf29ce484222325 + for i := 0; i < len; i++ { + h = (h ~ s[i] as u64) * 0x100000001b3 + } + return h +} + + +murmur64 :: proc(data_: rawptr, len: int) -> u64 { + SEED :: 0x9747b28c + + if size_of(int) == 8 { + m :: 0xc6a4a7935bd1e995 + r :: 47 + + h: u64 = SEED ~ (len as u64 * m) + + data := slice_ptr(data_ as ^u64, len/size_of(u64)) + data2 := slice_ptr(data_ as ^u8, len) + + for i := 0; i < data.count; i++ { + k := data[i] + + k *= m + k ~= k>>r + k *= m + + h ~= k + h *= m + } + + match len & 7 { + case 7: h ~= data2[6] as u64 << 48; fallthrough + case 6: h ~= data2[5] as u64 << 40; fallthrough + case 5: h ~= data2[4] as u64 << 32; fallthrough + case 4: h ~= data2[3] as u64 << 24; fallthrough + case 3: h ~= data2[2] as u64 << 16; fallthrough + case 2: h ~= data2[1] as u64 << 8; fallthrough + case 1: + h ~= data2[0] as u64 + h *= m + } + + h ~= h>>r + h *= m + h ~= h>>r + + return h + } else { + m :: 0x5bd1e995 + r :: 24 + + h1: u32 = SEED as u32 ~ len as u32 + h2: u32 = SEED >> 32 + + data := slice_ptr(data_ as ^u32, len/size_of(u32)) + + i := 0 + for len >= 8 { + k1, k2: u32 + k1 = data[i]; i++ + k1 *= m + k1 ~= k1>>r + k1 *= m + h1 *= m + h1 ~= k1 + len -= 4 + + k2 = data[i]; i++ + k2 *= m + k2 ~= k2>>r + k2 *= m + h2 *= m + h2 ~= k2 + len -= 4 + } + + if (len >= 4) { + k1: u32 + k1 = data[i]; i++ + k1 *= m + k1 ~= k1>>r + k1 *= m + h1 *= m + h1 ~= k1 + len -= 4 + } + + data8 := slice_ptr(ptr_offset(data.data, i) as ^u8, 3) // NOTE(bill): This is unsafe + + match len { + case 3: h2 ~= data8[2] as u32 << 16; fallthrough + case 2: h2 ~= data8[1] as u32 << 8; fallthrough + case 1: + h2 ~= data8[0] as u32 + h2 *= m + } + + h1 ~= h2>>18 + h1 *= m + h2 ~= h1>>22 + h2 *= m + h1 ~= h2>>17 + h1 *= m + h2 ~= h1>>19 + h2 *= m + + h := (h1 as u64)<<32 | h2 as u64 + return h + } +} + + + +__CRC32_TABLE := [256]u32{ + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, +} +__CRC64_TABLE := [256]u64{ + 0x0000000000000000, 0x42f0e1eba9ea3693, 0x85e1c3d753d46d26, 0xc711223cfa3e5bb5, + 0x493366450e42ecdf, 0x0bc387aea7a8da4c, 0xccd2a5925d9681f9, 0x8e224479f47cb76a, + 0x9266cc8a1c85d9be, 0xd0962d61b56fef2d, 0x17870f5d4f51b498, 0x5577eeb6e6bb820b, + 0xdb55aacf12c73561, 0x99a54b24bb2d03f2, 0x5eb4691841135847, 0x1c4488f3e8f96ed4, + 0x663d78ff90e185ef, 0x24cd9914390bb37c, 0xe3dcbb28c335e8c9, 0xa12c5ac36adfde5a, + 0x2f0e1eba9ea36930, 0x6dfeff5137495fa3, 0xaaefdd6dcd770416, 0xe81f3c86649d3285, + 0xf45bb4758c645c51, 0xb6ab559e258e6ac2, 0x71ba77a2dfb03177, 0x334a9649765a07e4, + 0xbd68d2308226b08e, 0xff9833db2bcc861d, 0x388911e7d1f2dda8, 0x7a79f00c7818eb3b, + 0xcc7af1ff21c30bde, 0x8e8a101488293d4d, 0x499b3228721766f8, 0x0b6bd3c3dbfd506b, + 0x854997ba2f81e701, 0xc7b97651866bd192, 0x00a8546d7c558a27, 0x4258b586d5bfbcb4, + 0x5e1c3d753d46d260, 0x1cecdc9e94ace4f3, 0xdbfdfea26e92bf46, 0x990d1f49c77889d5, + 0x172f5b3033043ebf, 0x55dfbadb9aee082c, 0x92ce98e760d05399, 0xd03e790cc93a650a, + 0xaa478900b1228e31, 0xe8b768eb18c8b8a2, 0x2fa64ad7e2f6e317, 0x6d56ab3c4b1cd584, + 0xe374ef45bf6062ee, 0xa1840eae168a547d, 0x66952c92ecb40fc8, 0x2465cd79455e395b, + 0x3821458aada7578f, 0x7ad1a461044d611c, 0xbdc0865dfe733aa9, 0xff3067b657990c3a, + 0x711223cfa3e5bb50, 0x33e2c2240a0f8dc3, 0xf4f3e018f031d676, 0xb60301f359dbe0e5, + 0xda050215ea6c212f, 0x98f5e3fe438617bc, 0x5fe4c1c2b9b84c09, 0x1d14202910527a9a, + 0x93366450e42ecdf0, 0xd1c685bb4dc4fb63, 0x16d7a787b7faa0d6, 0x5427466c1e109645, + 0x4863ce9ff6e9f891, 0x0a932f745f03ce02, 0xcd820d48a53d95b7, 0x8f72eca30cd7a324, + 0x0150a8daf8ab144e, 0x43a04931514122dd, 0x84b16b0dab7f7968, 0xc6418ae602954ffb, + 0xbc387aea7a8da4c0, 0xfec89b01d3679253, 0x39d9b93d2959c9e6, 0x7b2958d680b3ff75, + 0xf50b1caf74cf481f, 0xb7fbfd44dd257e8c, 0x70eadf78271b2539, 0x321a3e938ef113aa, + 0x2e5eb66066087d7e, 0x6cae578bcfe24bed, 0xabbf75b735dc1058, 0xe94f945c9c3626cb, + 0x676dd025684a91a1, 0x259d31cec1a0a732, 0xe28c13f23b9efc87, 0xa07cf2199274ca14, + 0x167ff3eacbaf2af1, 0x548f120162451c62, 0x939e303d987b47d7, 0xd16ed1d631917144, + 0x5f4c95afc5edc62e, 0x1dbc74446c07f0bd, 0xdaad56789639ab08, 0x985db7933fd39d9b, + 0x84193f60d72af34f, 0xc6e9de8b7ec0c5dc, 0x01f8fcb784fe9e69, 0x43081d5c2d14a8fa, + 0xcd2a5925d9681f90, 0x8fdab8ce70822903, 0x48cb9af28abc72b6, 0x0a3b7b1923564425, + 0x70428b155b4eaf1e, 0x32b26afef2a4998d, 0xf5a348c2089ac238, 0xb753a929a170f4ab, + 0x3971ed50550c43c1, 0x7b810cbbfce67552, 0xbc902e8706d82ee7, 0xfe60cf6caf321874, + 0xe224479f47cb76a0, 0xa0d4a674ee214033, 0x67c58448141f1b86, 0x253565a3bdf52d15, + 0xab1721da49899a7f, 0xe9e7c031e063acec, 0x2ef6e20d1a5df759, 0x6c0603e6b3b7c1ca, + 0xf6fae5c07d3274cd, 0xb40a042bd4d8425e, 0x731b26172ee619eb, 0x31ebc7fc870c2f78, + 0xbfc9838573709812, 0xfd39626eda9aae81, 0x3a28405220a4f534, 0x78d8a1b9894ec3a7, + 0x649c294a61b7ad73, 0x266cc8a1c85d9be0, 0xe17dea9d3263c055, 0xa38d0b769b89f6c6, + 0x2daf4f0f6ff541ac, 0x6f5faee4c61f773f, 0xa84e8cd83c212c8a, 0xeabe6d3395cb1a19, + 0x90c79d3fedd3f122, 0xd2377cd44439c7b1, 0x15265ee8be079c04, 0x57d6bf0317edaa97, + 0xd9f4fb7ae3911dfd, 0x9b041a914a7b2b6e, 0x5c1538adb04570db, 0x1ee5d94619af4648, + 0x02a151b5f156289c, 0x4051b05e58bc1e0f, 0x87409262a28245ba, 0xc5b073890b687329, + 0x4b9237f0ff14c443, 0x0962d61b56fef2d0, 0xce73f427acc0a965, 0x8c8315cc052a9ff6, + 0x3a80143f5cf17f13, 0x7870f5d4f51b4980, 0xbf61d7e80f251235, 0xfd913603a6cf24a6, + 0x73b3727a52b393cc, 0x31439391fb59a55f, 0xf652b1ad0167feea, 0xb4a25046a88dc879, + 0xa8e6d8b54074a6ad, 0xea16395ee99e903e, 0x2d071b6213a0cb8b, 0x6ff7fa89ba4afd18, + 0xe1d5bef04e364a72, 0xa3255f1be7dc7ce1, 0x64347d271de22754, 0x26c49cccb40811c7, + 0x5cbd6cc0cc10fafc, 0x1e4d8d2b65facc6f, 0xd95caf179fc497da, 0x9bac4efc362ea149, + 0x158e0a85c2521623, 0x577eeb6e6bb820b0, 0x906fc95291867b05, 0xd29f28b9386c4d96, + 0xcedba04ad0952342, 0x8c2b41a1797f15d1, 0x4b3a639d83414e64, 0x09ca82762aab78f7, + 0x87e8c60fded7cf9d, 0xc51827e4773df90e, 0x020905d88d03a2bb, 0x40f9e43324e99428, + 0x2cffe7d5975e55e2, 0x6e0f063e3eb46371, 0xa91e2402c48a38c4, 0xebeec5e96d600e57, + 0x65cc8190991cb93d, 0x273c607b30f68fae, 0xe02d4247cac8d41b, 0xa2dda3ac6322e288, + 0xbe992b5f8bdb8c5c, 0xfc69cab42231bacf, 0x3b78e888d80fe17a, 0x7988096371e5d7e9, + 0xf7aa4d1a85996083, 0xb55aacf12c735610, 0x724b8ecdd64d0da5, 0x30bb6f267fa73b36, + 0x4ac29f2a07bfd00d, 0x08327ec1ae55e69e, 0xcf235cfd546bbd2b, 0x8dd3bd16fd818bb8, + 0x03f1f96f09fd3cd2, 0x41011884a0170a41, 0x86103ab85a2951f4, 0xc4e0db53f3c36767, + 0xd8a453a01b3a09b3, 0x9a54b24bb2d03f20, 0x5d45907748ee6495, 0x1fb5719ce1045206, + 0x919735e51578e56c, 0xd367d40ebc92d3ff, 0x1476f63246ac884a, 0x568617d9ef46bed9, + 0xe085162ab69d5e3c, 0xa275f7c11f7768af, 0x6564d5fde549331a, 0x279434164ca30589, + 0xa9b6706fb8dfb2e3, 0xeb46918411358470, 0x2c57b3b8eb0bdfc5, 0x6ea7525342e1e956, + 0x72e3daa0aa188782, 0x30133b4b03f2b111, 0xf7021977f9cceaa4, 0xb5f2f89c5026dc37, + 0x3bd0bce5a45a6b5d, 0x79205d0e0db05dce, 0xbe317f32f78e067b, 0xfcc19ed95e6430e8, + 0x86b86ed5267cdbd3, 0xc4488f3e8f96ed40, 0x0359ad0275a8b6f5, 0x41a94ce9dc428066, + 0xcf8b0890283e370c, 0x8d7be97b81d4019f, 0x4a6acb477bea5a2a, 0x089a2aacd2006cb9, + 0x14dea25f3af9026d, 0x562e43b4931334fe, 0x913f6188692d6f4b, 0xd3cf8063c0c759d8, + 0x5dedc41a34bbeeb2, 0x1f1d25f19d51d821, 0xd80c07cd676f8394, 0x9afce626ce85b507, +} diff --git a/core/utf8.odin b/core/utf8.odin new file mode 100644 index 000000000..ffe0b6111 --- /dev/null +++ b/core/utf8.odin @@ -0,0 +1,212 @@ +RUNE_ERROR :: #rune "\ufffd" +RUNE_SELF :: 0x80 +MAX_RUNE :: #rune "\U0010ffff" +UTF_MAX :: 4 + +SURROGATE_MIN :: 0xd800 +SURROGATE_MAX :: 0xdfff + + +Accept_Range :: struct { + lo, hi: u8 +} + +accept_ranges := [5]Accept_Range{ + Accept_Range{0x80, 0xbf}, + Accept_Range{0xa0, 0xbf}, + Accept_Range{0x80, 0x9f}, + Accept_Range{0x90, 0xbf}, + Accept_Range{0x80, 0x8f}, +} + +accept_sizes := [256]u8{ + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x00-0x0f + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x10-0x1f + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x20-0x2f + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x30-0x3f + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x40-0x4f + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x50-0x5f + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x60-0x6f + 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, // 0x70-0x7f + + 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0x80-0x8f + 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0x90-0x9f + 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xa0-0xaf + 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xb0-0xbf + 0xf1, 0xf1, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xc0-0xcf + 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, // 0xd0-0xdf + 0x13, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x23, 0x03, 0x03, // 0xe0-0xef + 0x34, 0x04, 0x04, 0x04, 0x44, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, 0xf1, // 0xf0-0xff +} + +encode_rune :: proc(r: rune) -> ([4]byte, int) { + buf: [4]byte + i := r as u32 + mask: byte : 0x3f + if i <= 1<<7-1 { + buf[0] = r as byte + return buf, 1 + } + if i <= 1<<11-1 { + buf[0] = 0xc0 | (r>>6) as byte + buf[1] = 0x80 | (r) as byte & mask + return buf, 2 + } + + // Invalid or Surrogate range + if i > 0x0010ffff || + (0xd800 <= i && i <= 0xdfff) { + r = 0xfffd + } + + if i <= 1<<16-1 { + buf[0] = 0xe0 | (r>>12) as byte + buf[1] = 0x80 | (r>>6) as byte & mask + buf[2] = 0x80 | (r) as byte & mask + return buf, 3 + } + + buf[0] = 0xf0 | (r>>18) as byte + buf[1] = 0x80 | (r>>12) as byte & mask + buf[2] = 0x80 | (r>>6) as byte & mask + buf[3] = 0x80 | (r) as byte & mask + return buf, 4 +} + +decode_rune :: proc(s: string) -> (rune, int) { + n := s.count + if n < 1 { + return RUNE_ERROR, 0 + } + b0 := s[0] + x := accept_sizes[b0] + if x >= 0xf0 { + mask := (x as rune << 31) >> 31 // all zeros or all ones + return (b0 as rune) &~ mask | RUNE_ERROR&mask, 1 + } + size := x & 7 + ar := accept_ranges[x>>4] + if n < size as int { + return RUNE_ERROR, 1 + } + b1 := s[1] + if b1 < ar.lo || ar.hi < b1 { + return RUNE_ERROR, 1 + } + + MASK_X :: 0b00111111 + MASK_2 :: 0b00011111 + MASK_3 :: 0b00001111 + MASK_4 :: 0b00000111 + + if size == 2 { + return (b0&MASK_2) as rune <<6 | (b1&MASK_X) as rune, 2 + } + b2 := s[2] + if b2 < 0x80 || 0xbf < b2 { + return RUNE_ERROR, 1 + } + if size == 3 { + return (b0&MASK_3) as rune <<12 | (b1&MASK_X) as rune <<6 | (b2&MASK_X) as rune, 3 + } + b3 := s[3] + if b3 < 0x80 || 0xbf < b3 { + return RUNE_ERROR, 1 + } + return (b0&MASK_4) as rune <<18 | (b1&MASK_X) as rune <<12 | (b3&MASK_X) as rune <<6 | (b3&MASK_X) as rune, 4 + +} + + +valid_rune :: proc(r: rune) -> bool { + if r < 0 { + return false + } else if SURROGATE_MIN <= r && r <= SURROGATE_MAX { + return false + } else if r > MAX_RUNE { + return false + } + return true +} + +valid_string :: proc(s: string) -> bool { + n := s.count + for i := 0; i < n; { + si := s[i] + if si < RUNE_SELF { // ascii + i++ + continue + } + x := accept_sizes[si] + if x == 0xf1 { + return false + } + size := (x & 7) as int + if i+size > n { + return false + } + ar := accept_ranges[x>>4] + if b := s[i+1]; b < ar.lo || ar.hi < b { + return false + } else if size == 2 { + // Okay + } else if b := s[i+2]; b < 0x80 || 0xbf < b { + return false + } else if size == 3 { + // Okay + } else if b := s[i+3]; b < 0x80 || 0xbf < b { + return false + } + i += size + } + return true +} + +rune_count :: proc(s: string) -> int { + count := 0 + n := s.count + for i := 0; i < n; count++ { + si := s[i] + if si < RUNE_SELF { // ascii + i++ + continue + } + x := accept_sizes[si] + if x == 0xf1 { + i++ + continue + } + size := (x & 7) as int + if i+size > n { + i++ + continue + } + ar := accept_ranges[x>>4] + if b := s[i+1]; b < ar.lo || ar.hi < b { + size = 1 + } else if size == 2 { + // Okay + } else if b := s[i+2]; b < 0x80 || 0xbf < b { + size = 1 + } else if size == 3 { + // Okay + } else if b := s[i+3]; b < 0x80 || 0xbf < b { + size = 1 + } + i += size + } + return count +} + + +rune_size :: proc(r: rune) -> int { + match { + case r < 0: return -1 + case r <= 1<<7 - 1: return 1 + case r <= 1<<11 - 1: return 2 + case SURROGATE_MIN <= r && r <= SURROGATE_MAX: return -1 + case r <= 1<<16 - 1: return 3 + case r <= MAX_RUNE: return 4 + } + return -1 +}