stdinc: Drastically improve SDL_StepUTF8() and make it a public API.

Fixes #10105.
This commit is contained in:
Ryan C. Gordon
2024-06-26 21:32:45 -04:00
parent 9b8c5f642f
commit a9cfcf6bde
8 changed files with 211 additions and 58 deletions

View File

@@ -1250,6 +1250,67 @@ extern SDL_DECLSPEC int SDLCALL SDL_strcasecmp(const char *str1, const char *str
*/
extern SDL_DECLSPEC int SDLCALL SDL_strncasecmp(const char *str1, const char *str2, size_t maxlen);
/**
* The Unicode REPLACEMENT CHARACTER codepoint.
*
* SDL_StepUTF8() reports this codepoint when it encounters a UTF-8 string
* with encoding errors.
*
* This tends to render as something like a question mark in most places.
*
* \since This macro is available since SDL 3.0.0.
*
* \sa SDL_StepUTF8
*/
#define SDL_INVALID_UNICODE_CODEPOINT 0xFFFD
/**
* Decode a UTF-8 string, one Unicode codepoint at a time.
*
* This will return the first Unicode codepoint in the UTF-8 encoded
* string in `*pstr`, and then advance `*pstr` past any consumed bytes
* before returning.
*
* It will not access more than `*pslen` bytes from the string.
* `*pslen` will be adjusted, as well, subtracting the number of
* bytes consumed.
*
* `pslen` is allowed to be NULL, in which case the string _must_ be
* NULL-terminated, as the function will blindly read until it sees
* the NULL char.
*
* if `*pslen` is zero, it assumes the end of string is reached and
* returns a zero codepoint regardless of the contents of the string
* buffer.
*
* If the resulting codepoint is zero (a NULL terminator), or `*pslen`
* is zero, it will not advance `*pstr` or `*pslen` at all.
*
* Generally this function is called in a loop until it returns zero,
* adjusting its parameters each iteration.
*
* If an invalid UTF-8 sequence is encountered, this function returns
* SDL_INVALID_UNICODE_CODEPOINT and advances the string/length by one
* byte (which is to say, a multibyte sequence might produce several
* SDL_INVALID_UNICODE_CODEPOINT returns before it syncs to the next
* valid UTF-8 sequence).
*
* Several things can generate invalid UTF-8 sequences, including
* overlong encodings, the use of UTF-16 surrogate values, and
* truncated data. Please refer to
* [RFC3629](https://www.ietf.org/rfc/rfc3629.txt) for details.
*
* \param pstr a pointer to a UTF-8 string pointer to be read and adjusted.
* \param pslen a pointer to the number of bytes in the string, to be read
* and adjusted. NULL is allowed.
* \returns the first Unicode codepoint in the string.
*
* \threadsafety It is safe to call this function from any thread.
*
* \since This function is available since SDL 3.0.0.
*/
extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepUTF8(const char **pstr, size_t *pslen);
extern SDL_DECLSPEC int SDLCALL SDL_sscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, ...) SDL_SCANF_VARARG_FUNC(2);
extern SDL_DECLSPEC int SDLCALL SDL_vsscanf(const char *text, SDL_SCANF_FORMAT_STRING const char *fmt, va_list ap) SDL_SCANF_VARARG_FUNCV(2);
extern SDL_DECLSPEC int SDLCALL SDL_snprintf(SDL_OUT_Z_CAP(maxlen) char *text, size_t maxlen, SDL_PRINTF_FORMAT_STRING const char *fmt, ... ) SDL_PRINTF_VARARG_FUNC(3);