diff --git a/bin/llc.exe b/bin/llc.exe index a7bb7cc25..ea04ad6ef 100644 Binary files a/bin/llc.exe and b/bin/llc.exe differ diff --git a/bin/lli.exe b/bin/lli.exe deleted file mode 100644 index 7691672dc..000000000 Binary files a/bin/lli.exe and /dev/null differ diff --git a/bin/opt.exe b/bin/opt.exe index 9b34c9ac8..c3ab932e4 100644 Binary files a/bin/opt.exe and b/bin/opt.exe differ diff --git a/build.bat b/build.bat index 17dbe7e5b..4223b855f 100644 --- a/build.bat +++ b/build.bat @@ -44,8 +44,8 @@ del *.ilk > NUL 2> NUL cl %compiler_settings% "src\main.c" ^ /link %linker_settings% -OUT:%exe_name% ^ - && odin build code/Jaze/src/main.odin - rem && odin run code/demo.odin + && odin run code/demo.odin + rem && odin build code/Jaze/src/main.odin rem && odin build_dll code/example.odin ^ rem odin run code/demo.odin diff --git a/core/strconv.odin b/core/strconv.odin index 53d890a6b..2d618855a 100644 --- a/core/strconv.odin +++ b/core/strconv.odin @@ -25,7 +25,6 @@ append_bool :: proc(buf: []byte, b: bool) -> string { } append_uint :: proc(buf: []byte, u: u64, base: int) -> string { - using Int_Flag; return append_bits(buf, u, base, false, digits, 0); } append_int :: proc(buf: []byte, i: i64, base: int) -> string { diff --git a/src/unicode.c b/src/unicode.c index d65f2f2ae..c1277c4da 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -6,6 +6,7 @@ #pragma warning(pop) + bool rune_is_letter(Rune r) { if ((r < 0x80 && gb_char_is_alpha(cast(char)r)) || r == '_') { diff --git a/src/utf8proc/utf8proc.c b/src/utf8proc/utf8proc.c index 7a6c984ac..c14bbe13f 100644 --- a/src/utf8proc/utf8proc.c +++ b/src/utf8proc/utf8proc.c @@ -166,24 +166,24 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(utf8proc_int32_t uc, ut if (uc < 0x00) { return 0; } else if (uc < 0x80) { - dst[0] = uc; + dst[0] = (utf8proc_uint8_t) uc; return 1; } else if (uc < 0x800) { - dst[0] = 0xC0 + (uc >> 6); - dst[1] = 0x80 + (uc & 0x3F); + dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6)); + dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); return 2; // Note: we allow encoding 0xd800-0xdfff here, so as not to change // the API, however, these are actually invalid in UTF-8 } else if (uc < 0x10000) { - dst[0] = 0xE0 + (uc >> 12); - dst[1] = 0x80 + ((uc >> 6) & 0x3F); - dst[2] = 0x80 + (uc & 0x3F); + dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12)); + dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F)); + dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); return 3; } else if (uc < 0x110000) { - dst[0] = 0xF0 + (uc >> 18); - dst[1] = 0x80 + ((uc >> 12) & 0x3F); - dst[2] = 0x80 + ((uc >> 6) & 0x3F); - dst[3] = 0x80 + (uc & 0x3F); + dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18)); + dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F)); + dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F)); + dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); return 4; } else return 0; } @@ -193,28 +193,28 @@ static utf8proc_ssize_t unsafe_encode_char(utf8proc_int32_t uc, utf8proc_uint8_t if (uc < 0x00) { return 0; } else if (uc < 0x80) { - dst[0] = uc; + dst[0] = (utf8proc_uint8_t)uc; return 1; } else if (uc < 0x800) { - dst[0] = 0xC0 + (uc >> 6); - dst[1] = 0x80 + (uc & 0x3F); + dst[0] = (utf8proc_uint8_t)(0xC0 + (uc >> 6)); + dst[1] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); return 2; } else if (uc == 0xFFFF) { - dst[0] = 0xFF; + dst[0] = (utf8proc_uint8_t)0xFF; return 1; } else if (uc == 0xFFFE) { - dst[0] = 0xFE; + dst[0] = (utf8proc_uint8_t)0xFE; return 1; } else if (uc < 0x10000) { - dst[0] = 0xE0 + (uc >> 12); - dst[1] = 0x80 + ((uc >> 6) & 0x3F); - dst[2] = 0x80 + (uc & 0x3F); + dst[0] = (utf8proc_uint8_t)(0xE0 + (uc >> 12)); + dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F)); + dst[2] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); return 3; } else if (uc < 0x110000) { - dst[0] = 0xF0 + (uc >> 18); - dst[1] = 0x80 + ((uc >> 12) & 0x3F); - dst[2] = 0x80 + ((uc >> 6) & 0x3F); - dst[3] = 0x80 + (uc & 0x3F); + dst[0] = (utf8proc_uint8_t)(0xF0 + (uc >> 18)); + dst[1] = (utf8proc_uint8_t)(0x80 + ((uc >> 12) & 0x3F)); + dst[2] = (utf8proc_uint8_t)(0x80 + ((uc >> 6) & 0x3F)); + dst[3] = (utf8proc_uint8_t)(0x80 + (uc & 0x3F)); return 4; } else return 0; } @@ -383,7 +383,7 @@ UTF8PROC_DLLEXPORT int utf8proc_charwidth(utf8proc_int32_t c) { } UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(utf8proc_int32_t c) { - return (utf8proc_category_t)utf8proc_get_property(c)->category; + return utf8proc_get_property(c)->category; } UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) { @@ -391,11 +391,9 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t c) { return s[utf8proc_category(c)]; } - - #define utf8proc_decompose_lump(replacement_uc) \ - return utf8proc_decompose_char((utf8proc_int32_t)(replacement_uc), dst, bufsize, \ - (utf8proc_option_t)((utf8proc_int32_t)options & ~UTF8PROC_LUMP), last_boundclass) + return utf8proc_decompose_char((replacement_uc), dst, bufsize, \ + options & ~UTF8PROC_LUMP, last_boundclass) UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { const utf8proc_property_t *property; @@ -458,12 +456,12 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, category == UTF8PROC_CATEGORY_ME) return 0; } if (options & UTF8PROC_CASEFOLD) { - if ((utf8proc_int16_t)property->casefold_seqindex != UINT16_MAX) { + if (property->casefold_seqindex != UINT16_MAX) { return seqindex_write_char_decomposed(property->casefold_seqindex, dst, bufsize, options, last_boundclass); } } if (options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) { - if ((utf8proc_int16_t)property->decomp_seqindex != UINT16_MAX && + if (property->decomp_seqindex != UINT16_MAX && (!property->decomp_type || (options & UTF8PROC_COMPAT))) { return seqindex_write_char_decomposed(property->decomp_seqindex, dst, bufsize, options, last_boundclass); } @@ -485,6 +483,14 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options +) { + return utf8proc_decompose_custom(str, strlen, buffer, bufsize, options, NULL, NULL); +} + +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, + utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data ) { /* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */ utf8proc_ssize_t wpos = 0; @@ -511,6 +517,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( rpos += utf8proc_iterate(str + rpos, strlen - rpos, &uc); if (uc < 0) return UTF8PROC_ERROR_INVALIDUTF8; } + if (custom_func != NULL) { + uc = custom_func(uc, custom_data); /* user-specified custom mapping */ + } decomp_result = utf8proc_decompose_char( uc, buffer + wpos, (bufsize > wpos) ? (bufsize - wpos) : 0, options, &boundclass @@ -545,9 +554,8 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( return wpos; } -UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { - /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored - ASSERT: 'buffer' has one spare byte of free space at the end! */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { + /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored */ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { utf8proc_ssize_t rpos; utf8proc_ssize_t wpos = 0; @@ -621,7 +629,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, starter_property = unsafe_get_property(*starter); } if (starter_property->comb_index < 0x8000 && - (utf8proc_int16_t)current_property->comb_index != UINT16_MAX && + current_property->comb_index != UINT16_MAX && current_property->comb_index >= 0x8000) { int sidx = starter_property->comb_index; int idx = (current_property->comb_index & 0x3FFF) - utf8proc_combinations[sidx]; @@ -655,6 +663,14 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, } length = wpos; } + return length; +} + +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { + /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored + ASSERT: 'buffer' has one spare byte of free space at the end! */ + length = utf8proc_normalize_utf32(buffer, length, options); + if (length < 0) return length; { utf8proc_ssize_t rpos, wpos = 0; utf8proc_int32_t uc; @@ -676,15 +692,22 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(utf8proc_int32_t *buffer, UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options +) { + return utf8proc_map_custom(str, strlen, dstptr, options, NULL, NULL); +} + +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data ) { utf8proc_int32_t *buffer; utf8proc_ssize_t result; *dstptr = NULL; - result = utf8proc_decompose(str, strlen, NULL, 0, options); + result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data); if (result < 0) return result; buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1); if (!buffer) return UTF8PROC_ERROR_NOMEM; - result = utf8proc_decompose(str, strlen, buffer, result, options); + result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data); if (result < 0) { free(buffer); return result; @@ -705,29 +728,28 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str) { utf8proc_uint8_t *retval; - utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE | - UTF8PROC_DECOMPOSE)); + utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | + UTF8PROC_DECOMPOSE); return retval; } UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str) { utf8proc_uint8_t *retval; - utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE | - UTF8PROC_COMPOSE)); + utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | + UTF8PROC_COMPOSE); return retval; } UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str) { utf8proc_uint8_t *retval; - utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE | - UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT)); + utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | + UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT); return retval; } UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str) { utf8proc_uint8_t *retval; - utf8proc_map(str, 0, &retval, (utf8proc_option_t)(UTF8PROC_NULLTERM | UTF8PROC_STABLE | - UTF8PROC_COMPOSE | UTF8PROC_COMPAT)); + utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | + UTF8PROC_COMPOSE | UTF8PROC_COMPAT); return retval; } - diff --git a/src/utf8proc/utf8proc.h b/src/utf8proc/utf8proc.h index 240fac66f..2dd8c1917 100644 --- a/src/utf8proc/utf8proc.h +++ b/src/utf8proc/utf8proc.h @@ -71,14 +71,15 @@ /** The MAJOR version number (increased when backwards API compatibility is broken). */ #define UTF8PROC_VERSION_MAJOR 2 /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */ -#define UTF8PROC_VERSION_MINOR 0 +#define UTF8PROC_VERSION_MINOR 1 /** The PATCH version (increased for fixes that do not change the API). */ -#define UTF8PROC_VERSION_PATCH 2 +#define UTF8PROC_VERSION_PATCH 0 /** @} */ #include -#include -#ifdef _MSC_VER + +#if defined(_MSC_VER) && _MSC_VER < 1800 +// MSVC prior to 2013 lacked stdbool.h and inttypes.h typedef signed char utf8proc_int8_t; typedef unsigned char utf8proc_uint8_t; typedef short utf8proc_int16_t; @@ -93,12 +94,18 @@ typedef int utf8proc_ssize_t; typedef unsigned int utf8proc_size_t; # endif # ifndef __cplusplus +// emulate C99 bool typedef unsigned char utf8proc_bool; -// enum {false, true}; +# ifndef __bool_true_false_are_defined +# define false 0 +# define true 1 +# define __bool_true_false_are_defined 1 +# endif # else typedef bool utf8proc_bool; # endif #else +# include # include # include typedef int8_t utf8proc_int8_t; @@ -108,22 +115,12 @@ typedef uint16_t utf8proc_uint16_t; typedef int32_t utf8proc_int32_t; typedef uint32_t utf8proc_uint32_t; typedef size_t utf8proc_size_t; -typedef ssize_t utf8proc_ssize_t; +typedef ptrdiff_t utf8proc_ssize_t; typedef bool utf8proc_bool; #endif #include -#ifdef _WIN32 -# ifdef UTF8PROC_EXPORTS -# define UTF8PROC_DLLEXPORT __declspec(dllexport) -# else -# define UTF8PROC_DLLEXPORT /*__declspec(dllimport)*/ -# endif -#elif __GNUC__ >= 4 -# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default"))) -#else -# define UTF8PROC_DLLEXPORT -#endif +#define UTF8PROC_DLLEXPORT #ifdef __cplusplus extern "C" { @@ -134,7 +131,7 @@ extern "C" { #endif #ifndef UINT16_MAX -# define UINT16_MAX ~(utf8proc_uint16_t)0 +# define UINT16_MAX 65535U #endif /** @@ -373,6 +370,13 @@ typedef enum { UTF8PROC_BOUNDCLASS_E_BASE_GAZ = 18, /**< E_BASE + GLUE_AFTER_ZJW */ } utf8proc_boundclass_t; +/** + * Function pointer type passed to @ref utf8proc_map_custom and + * @ref utf8proc_decompose_custom, which is used to specify a user-defined + * mapping of codepoints to be applied in conjunction with other mappings. + */ +typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data); + /** * Array containing the byte lengths of a UTF-8 encoded codepoint based * on the first byte. @@ -480,6 +484,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( * `buffer` (which must contain at least `bufsize` entries). In case of * success, the number of codepoints written is returned; in case of an * error, a negative error code is returned (@ref utf8proc_errmsg). + * See @ref utf8proc_decompose_custom to supply additional transformations. * * If the number of written codepoints would be bigger than `bufsize`, the * required buffer size is returned, while the buffer will be overwritten with @@ -491,8 +496,20 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( ); /** - * Reencodes the sequence of `length` codepoints pointed to by `buffer` - * UTF-8 data in-place (i.e., the result is also stored in `buffer`). + * The same as @ref utf8proc_decompose, but also takes a `custom_func` mapping function + * that is called on each codepoint in `str` before any other transformations + * (along with a `custom_data` pointer that is passed through to `custom_func`). + * The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, + utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data +); + +/** + * Normalizes the sequence of `length` codepoints pointed to by `buffer` + * in-place (i.e., the result is also stored in `buffer`). * * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. * @param length the length (in codepoints) of the buffer. @@ -507,9 +524,37 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( * the unicode versioning stability * * @return - * In case of success, the length (in bytes) of the resulting UTF-8 string is + * In case of success, the length (in codepoints) of the normalized UTF-32 string is * returned; otherwise, a negative error code is returned (@ref utf8proc_errmsg). * + * @warning The entries of the array pointed to by `str` have to be in the + * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); + +/** + * Reencodes the sequence of `length` codepoints pointed to by `buffer` + * UTF-8 data in-place (i.e., the result is also stored in `buffer`). + * Can optionally normalize the UTF-32 sequence prior to UTF-8 conversion. + * + * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. + * @param length the length (in codepoints) of the buffer. + * @param options a bitwise or (`|`) of one or more of the following flags: + * - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS + * - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS + * - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF + * - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters + * - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite + * codepoints + * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate + * the unicode versioning stability + * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster + * + * @return + * In case of success, the length (in bytes) of the resulting nul-terminated + * UTF-8 string is returned; otherwise, a negative error code is returned + * (@ref utf8proc_errmsg). + * * @warning The amount of free space pointed to by `buffer` must * exceed the amount of the input data by one byte, and the * entries of the array pointed to by `str` have to be in the @@ -595,7 +640,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi * in any case the result will be NULL terminated (though it might * contain NULL characters with the string if `str` contained NULL * characters). Other flags in the `options` field are passed to the - * functions defined above, and regarded as described. + * functions defined above, and regarded as described. See also + * @ref utfproc_map_custom to supply a custom codepoint transformation. * * In case of success the length of the new string is returned, * otherwise a negative error code is returned. @@ -607,6 +653,17 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options ); +/** + * Like @ref utf8proc_map, but also takes a `custom_func` mapping function + * that is called on each codepoint in `str` before any other transformations + * (along with a `custom_data` pointer that is passed through to `custom_func`). + * The `custom_func` argument is ignored if it is `NULL`. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data +); + /** @name Unicode normalization * * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC @@ -619,9 +676,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str); /** NFC normalization (@ref UTF8PROC_COMPOSE). */ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str); -/** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ +/** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str); -/** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ +/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str); /** @} */ @@ -630,4 +687,3 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str); #endif #endif - diff --git a/src/utf8proc/utf8proc_data.c b/src/utf8proc/utf8proc_data.c index dc44f8125..defd77303 100644 --- a/src/utf8proc/utf8proc_data.c +++ b/src/utf8proc/utf8proc_data.c @@ -1,7 +1,4 @@ -#pragma warning(push) -#pragma warning(disable: 4838) - -const utf8proc_uint16_t utf8proc_sequences[] = { +static const utf8proc_uint16_t utf8proc_sequences[] = { 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, @@ -1179,7 +1176,7 @@ const utf8proc_uint16_t utf8proc_sequences[] = { 56603, 55354, 56604, 55354, 56605, 55354, 56606, 55354, 56607, 55354, 56608, 55354, 56609, }; -const utf8proc_uint16_t utf8proc_stage1table[] = { +static const utf8proc_uint16_t utf8proc_stage1table[] = { 0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, @@ -1726,7 +1723,7 @@ const utf8proc_uint16_t utf8proc_stage1table[] = { 18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432, 38656, }; -const utf8proc_uint16_t utf8proc_stage2table[] = { +static const utf8proc_uint16_t utf8proc_stage2table[] = { 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 3, 5, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -5899,7 +5896,7 @@ const utf8proc_uint16_t utf8proc_stage2table[] = { 540, 540, 540, 1180, 0, 0, 0, 0, 0, 1154, 1154, 1154, 1154, 1154, 1154, 1154, 1154, 1154, 1154, 0, 0, 0, 0, 1103, - 1158, 0, 0, 0, 0, 0, 0, 0, + 1103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -6593,7 +6590,7 @@ const utf8proc_uint16_t utf8proc_stage2table[] = { 3984, 3984, 3984, 3984, 3984, 3984, 3984, 0, 0, }; -const utf8proc_property_t utf8proc_properties[] = { +static const utf8proc_property_t utf8proc_properties[] = { {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 0, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_CC, 0, UTF8PROC_BIDI_CLASS_BN, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, true, false, true, 0, 0, UTF8PROC_BOUNDCLASS_CONTROL}, {UTF8PROC_CATEGORY_CC, 0, UTF8PROC_BIDI_CLASS_BN, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, true, 0, 0, UTF8PROC_BOUNDCLASS_CONTROL}, @@ -7850,7 +7847,7 @@ const utf8proc_property_t utf8proc_properties[] = { {UTF8PROC_CATEGORY_MN, 122, UTF8PROC_BIDI_CLASS_NSM, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 0, 0, UTF8PROC_BOUNDCLASS_EXTEND}, {UTF8PROC_CATEGORY_LO, 0, UTF8PROC_BIDI_CLASS_L, UTF8PROC_DECOMP_TYPE_COMPAT, 9523, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LO, 0, UTF8PROC_BIDI_CLASS_L, UTF8PROC_DECOMP_TYPE_COMPAT, 9525, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, - {UTF8PROC_CATEGORY_PO, 0, UTF8PROC_BIDI_CLASS_L, UTF8PROC_DECOMP_TYPE_NOBREAK, 1335, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, + {UTF8PROC_CATEGORY_PO, 0, UTF8PROC_BIDI_CLASS_L, UTF8PROC_DECOMP_TYPE_NOBREAK, 1335, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_MN, 216, UTF8PROC_BIDI_CLASS_NSM, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 0, 0, UTF8PROC_BOUNDCLASS_EXTEND}, {UTF8PROC_CATEGORY_PS, 0, UTF8PROC_BIDI_CLASS_ON, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, true, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_PE, 0, UTF8PROC_BIDI_CLASS_ON, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, true, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER}, @@ -10478,7 +10475,7 @@ const utf8proc_property_t utf8proc_properties[] = { {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 1470, UINT16_MAX, 1470, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 1478, UINT16_MAX, 1478, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 5132, UINT16_MAX, 5132, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, - {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 1480, UINT16_MAX, 1480, UINT16_MAX, UINT16_MAX, false, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER}, + {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 1480, UINT16_MAX, 1480, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 5133, UINT16_MAX, 5133, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 5134, UINT16_MAX, 5134, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 1482, UINT16_MAX, 1482, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, @@ -12168,7 +12165,7 @@ const utf8proc_property_t utf8proc_properties[] = { {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 6787, UINT16_MAX, 6787, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 6789, UINT16_MAX, 6789, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 6791, UINT16_MAX, 6791, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, - {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 6793, UINT16_MAX, 6793, UINT16_MAX, UINT16_MAX, false, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER}, + {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 6793, UINT16_MAX, 6793, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 6795, UINT16_MAX, 6795, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 6797, UINT16_MAX, 6797, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LU, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, 6799, UINT16_MAX, 6799, UINT16_MAX, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, @@ -12204,7 +12201,7 @@ const utf8proc_property_t utf8proc_properties[] = { {UTF8PROC_CATEGORY_LL, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, UINT16_MAX, 9104, UINT16_MAX, 9104, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LL, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, UINT16_MAX, 9106, UINT16_MAX, 9106, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LL, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, UINT16_MAX, 9108, UINT16_MAX, 9108, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, - {UTF8PROC_CATEGORY_LL, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, UINT16_MAX, 9110, UINT16_MAX, 9110, UINT16_MAX, false, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER}, + {UTF8PROC_CATEGORY_LL, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, UINT16_MAX, 9110, UINT16_MAX, 9110, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LL, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, UINT16_MAX, 9112, UINT16_MAX, 9112, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LL, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, UINT16_MAX, 9114, UINT16_MAX, 9114, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, {UTF8PROC_CATEGORY_LL, 0, UTF8PROC_BIDI_CLASS_L, 0, UINT16_MAX, UINT16_MAX, 9116, UINT16_MAX, 9116, UINT16_MAX, false, false, false, false, 1, 0, UTF8PROC_BOUNDCLASS_OTHER}, @@ -13423,7 +13420,7 @@ const utf8proc_property_t utf8proc_properties[] = { {UTF8PROC_CATEGORY_LO, 0, UTF8PROC_BIDI_CLASS_L, 0, 7975, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER}, }; -const utf8proc_uint16_t utf8proc_combinations[] = { +static const utf8proc_uint16_t utf8proc_combinations[] = { 0, 46, 192, 193, 194, 195, 196, 197, 0, 256, 258, 260, 550, 461, 0, 0, 512, 514, 0, 0, 0, 0, 0, 0, 0, @@ -14386,5 +14383,3 @@ const utf8proc_uint16_t utf8proc_combinations[] = { 72, 75, 1, 53694, 1, 53696, }; - -#pragma warning(pop)