mirror of
				https://github.com/raysan5/raylib.git
				synced 2025-10-26 12:27:01 +00:00 
			
		
		
		
	ADDED: TextToUtf8() -WIP-
RENAMED: TextCountCodepoints() -> GetCodepointsCount()
This commit is contained in:
		
							
								
								
									
										11
									
								
								src/raylib.h
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								src/raylib.h
									
									
									
									
									
								
							| @@ -1188,14 +1188,11 @@ RLAPI void DrawTextRecEx(Font font, const char *text, Rectangle rec, float fontS | |||||||
| RLAPI int MeasureText(const char *text, int fontSize);                                      // Measure string width for default font | RLAPI int MeasureText(const char *text, int fontSize);                                      // Measure string width for default font | ||||||
| RLAPI Vector2 MeasureTextEx(Font font, const char *text, float fontSize, float spacing);    // Measure string size for Font | RLAPI Vector2 MeasureTextEx(Font font, const char *text, float fontSize, float spacing);    // Measure string size for Font | ||||||
| RLAPI int GetGlyphIndex(Font font, int character);                                          // Get index position for a unicode character on font | RLAPI int GetGlyphIndex(Font font, int character);                                          // Get index position for a unicode character on font | ||||||
| RLAPI int GetNextCodepoint(const char *text, int *bytesProcessed);                          // Returns next codepoint in a UTF8 encoded string; 0x3f('?') is returned on failure |  | ||||||
| RLAPI int *GetCodepoints(const char *text, int *count);                                     // Get all codepoints in a string, codepoints count returned by parameters |  | ||||||
|  |  | ||||||
| // Text strings management functions | // Text strings management functions (no utf8 strings, only byte chars) | ||||||
| // NOTE: Some strings allocate memory internally for returned strings, just be careful! | // NOTE: Some strings allocate memory internally for returned strings, just be careful! | ||||||
| RLAPI bool TextIsEqual(const char *text1, const char *text2);                               // Check if two text string are equal | RLAPI bool TextIsEqual(const char *text1, const char *text2);                               // Check if two text string are equal | ||||||
| RLAPI unsigned int TextLength(const char *text);                                            // Get text length, checks for '\0' ending | RLAPI unsigned int TextLength(const char *text);                                            // Get text length, checks for '\0' ending | ||||||
| RLAPI unsigned int TextCountCodepoints(const char *text);                                   // Get total number of characters (codepoints) in a UTF8 encoded string |  | ||||||
| RLAPI const char *TextFormat(const char *text, ...);                                        // Text formatting with variables (sprintf style) | RLAPI const char *TextFormat(const char *text, ...);                                        // Text formatting with variables (sprintf style) | ||||||
| RLAPI const char *TextSubtext(const char *text, int position, int length);                  // Get a piece of a text string | RLAPI const char *TextSubtext(const char *text, int position, int length);                  // Get a piece of a text string | ||||||
| RLAPI char *TextReplace(char *text, const char *replace, const char *by);                   // Replace text string (memory should be freed!) | RLAPI char *TextReplace(char *text, const char *replace, const char *by);                   // Replace text string (memory should be freed!) | ||||||
| @@ -1208,6 +1205,12 @@ RLAPI const char *TextToUpper(const char *text);                      // Get upp | |||||||
| RLAPI const char *TextToLower(const char *text);                      // Get lower case version of provided string | RLAPI const char *TextToLower(const char *text);                      // Get lower case version of provided string | ||||||
| RLAPI const char *TextToPascal(const char *text);                     // Get Pascal case notation version of provided string | RLAPI const char *TextToPascal(const char *text);                     // Get Pascal case notation version of provided string | ||||||
| RLAPI int TextToInteger(const char *text);                            // Get integer value from text (negative values not supported) | RLAPI int TextToInteger(const char *text);                            // Get integer value from text (negative values not supported) | ||||||
|  | RLAPI const char *TextToUtf8(int codepoint, int *byteLength);         // Encode codepoint into utf8 text (char array length returned as parameter) | ||||||
|  |  | ||||||
|  | // UTF8 text strings management functions | ||||||
|  | RLAPI int *GetCodepoints(const char *text, int *count);               // Get all codepoints in a string, codepoints count returned by parameters | ||||||
|  | RLAPI int GetCodepointsCount(const char *text);                       // Get total number of characters (codepoints) in a UTF8 encoded string | ||||||
|  | RLAPI int GetNextCodepoint(const char *text, int *bytesProcessed);    // Returns next codepoint in a UTF8 encoded string; 0x3f('?') is returned on failure | ||||||
|  |  | ||||||
| //------------------------------------------------------------------------------------ | //------------------------------------------------------------------------------------ | ||||||
| // Basic 3d Shapes Drawing Functions (Module: models) | // Basic 3d Shapes Drawing Functions (Module: models) | ||||||
|   | |||||||
							
								
								
									
										349
									
								
								src/text.c
									
									
									
									
									
								
							
							
						
						
									
										349
									
								
								src/text.c
									
									
									
									
									
								
							| @@ -777,140 +777,6 @@ void DrawFPS(int posX, int posY) | |||||||
|     DrawText(TextFormat("%2i FPS", fps), posX, posY, 20, LIME); |     DrawText(TextFormat("%2i FPS", fps), posX, posY, 20, LIME); | ||||||
| } | } | ||||||
|  |  | ||||||
| // Returns next codepoint in a UTF8 encoded text, scanning until '\0' is found |  | ||||||
| // When a invalid UTF8 byte is encountered we exit as soon as possible and a '?'(0x3f) codepoint is returned |  | ||||||
| // Total number of bytes processed are returned as a parameter |  | ||||||
| // NOTE: the standard says U+FFFD should be returned in case of errors |  | ||||||
| // but that character is not supported by the default font in raylib |  | ||||||
| // TODO: optimize this code for speed!! |  | ||||||
| int GetNextCodepoint(const char *text, int *bytesProcessed) |  | ||||||
| { |  | ||||||
| /* |  | ||||||
|     UTF8 specs from https://www.ietf.org/rfc/rfc3629.txt |  | ||||||
|  |  | ||||||
|     Char. number range  |        UTF-8 octet sequence |  | ||||||
|       (hexadecimal)    |              (binary) |  | ||||||
|     --------------------+--------------------------------------------- |  | ||||||
|     0000 0000-0000 007F | 0xxxxxxx |  | ||||||
|     0000 0080-0000 07FF | 110xxxxx 10xxxxxx |  | ||||||
|     0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx |  | ||||||
|     0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |  | ||||||
| */ |  | ||||||
|  |  | ||||||
|     // NOTE: on decode errors we return as soon as possible |  | ||||||
|  |  | ||||||
|     int code = 0x3f;   // Codepoint (defaults to '?') |  | ||||||
|     int octet = (unsigned char)(text[0]); // The first UTF8 octet |  | ||||||
|     *bytesProcessed = 1; |  | ||||||
|  |  | ||||||
|     if (octet <= 0x7f) |  | ||||||
|     { |  | ||||||
|         // Only one octet (ASCII range x00-7F) |  | ||||||
|         code = text[0]; |  | ||||||
|     } |  | ||||||
|     else if ((octet & 0xe0) == 0xc0) |  | ||||||
|     { |  | ||||||
|         // Two octets |  | ||||||
|         // [0]xC2-DF    [1]UTF8-tail(x80-BF) |  | ||||||
|         unsigned char octet1 = text[1]; |  | ||||||
|  |  | ||||||
|         if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence |  | ||||||
|  |  | ||||||
|         if ((octet >= 0xc2) && (octet <= 0xdf)) |  | ||||||
|         { |  | ||||||
|             code = ((octet & 0x1f) << 6) | (octet1 & 0x3f); |  | ||||||
|             *bytesProcessed = 2; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else if ((octet & 0xf0) == 0xe0) |  | ||||||
|     { |  | ||||||
|         // Three octets |  | ||||||
|         unsigned char octet1 = text[1]; |  | ||||||
|         unsigned char octet2 = '\0'; |  | ||||||
|  |  | ||||||
|         if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence |  | ||||||
|  |  | ||||||
|         octet2 = text[2]; |  | ||||||
|  |  | ||||||
|         if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence |  | ||||||
|  |  | ||||||
|         /* |  | ||||||
|             [0]xE0    [1]xA0-BF       [2]UTF8-tail(x80-BF) |  | ||||||
|             [0]xE1-EC [1]UTF8-tail    [2]UTF8-tail(x80-BF) |  | ||||||
|             [0]xED    [1]x80-9F       [2]UTF8-tail(x80-BF) |  | ||||||
|             [0]xEE-EF [1]UTF8-tail    [2]UTF8-tail(x80-BF) |  | ||||||
|         */ |  | ||||||
|  |  | ||||||
|         if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) || |  | ||||||
|             ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { *bytesProcessed = 2; return code; } |  | ||||||
|  |  | ||||||
|         if ((octet >= 0xe0) && (0 <= 0xef)) |  | ||||||
|         { |  | ||||||
|             code = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f); |  | ||||||
|             *bytesProcessed = 3; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     else if ((octet & 0xf8) == 0xf0) |  | ||||||
|     { |  | ||||||
|         // Four octets |  | ||||||
|         if (octet > 0xf4) return code; |  | ||||||
|  |  | ||||||
|         unsigned char octet1 = text[1]; |  | ||||||
|         unsigned char octet2 = '\0'; |  | ||||||
|         unsigned char octet3 = '\0'; |  | ||||||
|  |  | ||||||
|         if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; }  // Unexpected sequence |  | ||||||
|  |  | ||||||
|         octet2 = text[2]; |  | ||||||
|  |  | ||||||
|         if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; }  // Unexpected sequence |  | ||||||
|  |  | ||||||
|         octet3 = text[3]; |  | ||||||
|  |  | ||||||
|         if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { *bytesProcessed = 4; return code; }  // Unexpected sequence |  | ||||||
|  |  | ||||||
|         /* |  | ||||||
|             [0]xF0       [1]x90-BF       [2]UTF8-tail  [3]UTF8-tail |  | ||||||
|             [0]xF1-F3    [1]UTF8-tail    [2]UTF8-tail  [3]UTF8-tail |  | ||||||
|             [0]xF4       [1]x80-8F       [2]UTF8-tail  [3]UTF8-tail |  | ||||||
|         */ |  | ||||||
|  |  | ||||||
|         if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) || |  | ||||||
|             ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { *bytesProcessed = 2; return code; } // Unexpected sequence |  | ||||||
|  |  | ||||||
|         if (octet >= 0xf0) |  | ||||||
|         { |  | ||||||
|             code = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f); |  | ||||||
|             *bytesProcessed = 4; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (code > 0x10ffff) code = 0x3f;     // Codepoints after U+10ffff are invalid |  | ||||||
|  |  | ||||||
|     return code; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Get all codepoints in a string, codepoints count returned by parameters |  | ||||||
| int *GetCodepoints(const char *text, int *count) |  | ||||||
| { |  | ||||||
|     static int codepoints[MAX_TEXT_UNICODE_CHARS] = { 0 }; |  | ||||||
|     memset(codepoints, 0, MAX_TEXT_UNICODE_CHARS*sizeof(int)); |  | ||||||
|  |  | ||||||
|     int bytesProcessed = 0; |  | ||||||
|     int textLength = strlen(text); |  | ||||||
|     int codepointsCount = 0; |  | ||||||
|  |  | ||||||
|     for (int i = 0; i < textLength; codepointsCount++) |  | ||||||
|     { |  | ||||||
|         codepoints[codepointsCount] = GetNextCodepoint(text + i, &bytesProcessed); |  | ||||||
|         i += bytesProcessed; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     *count = codepointsCount; |  | ||||||
|  |  | ||||||
|     return codepoints; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Draw text (using default font) | // Draw text (using default font) | ||||||
| // NOTE: fontSize work like in any drawing program but if fontSize is lower than font-base-size, then font-base-size is used | // NOTE: fontSize work like in any drawing program but if fontSize is lower than font-base-size, then font-base-size is used | ||||||
| // NOTE: chars spacing is proportional to fontSize | // NOTE: chars spacing is proportional to fontSize | ||||||
| @@ -1235,27 +1101,6 @@ unsigned int TextLength(const char *text) | |||||||
|     return length; |     return length; | ||||||
| } | } | ||||||
|  |  | ||||||
| // Returns total number of characters(codepoints) in a UTF8 encoded text, until '\0' is found |  | ||||||
| // NOTE: If an invalid UTF8 sequence is encountered a '?'(0x3f) codepoint is counted instead |  | ||||||
| unsigned int TextCountCodepoints(const char *text) |  | ||||||
| { |  | ||||||
|     unsigned int len = 0; |  | ||||||
|     char *ptr = (char *)&text[0]; |  | ||||||
|  |  | ||||||
|     while (*ptr != '\0') |  | ||||||
|     { |  | ||||||
|         int next = 0; |  | ||||||
|         int letter = GetNextCodepoint(ptr, &next); |  | ||||||
|  |  | ||||||
|         if (letter == 0x3f) ptr += 1; |  | ||||||
|         else ptr += next; |  | ||||||
|  |  | ||||||
|         len++; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return len; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| // Formatting of text with variables to 'embed' | // Formatting of text with variables to 'embed' | ||||||
| const char *TextFormat(const char *text, ...) | const char *TextFormat(const char *text, ...) | ||||||
| { | { | ||||||
| @@ -1531,6 +1376,200 @@ int TextToInteger(const char *text) | |||||||
|  |  | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // Encode codepoint into utf8 text (char array length returned as parameter) | ||||||
|  | RLAPI const char *TextToUtf8(int codepoint, int *byteLength) | ||||||
|  | { | ||||||
|  |     static char utf8[6] = { 0 }; | ||||||
|  |     int length = 0; | ||||||
|  |  | ||||||
|  |     if (codepoint <= 0x7f) | ||||||
|  |     { | ||||||
|  |         utf8[0] = (char)codepoint; | ||||||
|  |         length = 1; | ||||||
|  |     } | ||||||
|  |     else if (codepoint <= 0x7ff) | ||||||
|  |     { | ||||||
|  |         utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0); | ||||||
|  |         utf8[1] = (char)((codepoint & 0x3f) | 0x80); | ||||||
|  |         length = 2; | ||||||
|  |     } | ||||||
|  |     else if (codepoint <= 0xffff) | ||||||
|  |     { | ||||||
|  |         utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0); | ||||||
|  |         utf8[1] = (char)(((codepoint >>  6) & 0x3f) | 0x80); | ||||||
|  |         utf8[2] = (char)((codepoint & 0x3f) | 0x80); | ||||||
|  |         length = 3; | ||||||
|  |     } | ||||||
|  |     else if (codepoint <= 0x10ffff) | ||||||
|  |     { | ||||||
|  |         utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0); | ||||||
|  |         utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80); | ||||||
|  |         utf8[2] = (char)(((codepoint >>  6) & 0x3f) | 0x80); | ||||||
|  |         utf8[3] = (char)((codepoint & 0x3f) | 0x80); | ||||||
|  |         length = 4; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     *byteLength = length; | ||||||
|  |  | ||||||
|  |     return utf8; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | // Get all codepoints in a string, codepoints count returned by parameters | ||||||
|  | int *GetCodepoints(const char *text, int *count) | ||||||
|  | { | ||||||
|  |     static int codepoints[MAX_TEXT_UNICODE_CHARS] = { 0 }; | ||||||
|  |     memset(codepoints, 0, MAX_TEXT_UNICODE_CHARS*sizeof(int)); | ||||||
|  |  | ||||||
|  |     int bytesProcessed = 0; | ||||||
|  |     int textLength = strlen(text); | ||||||
|  |     int codepointsCount = 0; | ||||||
|  |  | ||||||
|  |     for (int i = 0; i < textLength; codepointsCount++) | ||||||
|  |     { | ||||||
|  |         codepoints[codepointsCount] = GetNextCodepoint(text + i, &bytesProcessed); | ||||||
|  |         i += bytesProcessed; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     *count = codepointsCount; | ||||||
|  |  | ||||||
|  |     return codepoints; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Returns total number of characters(codepoints) in a UTF8 encoded text, until '\0' is found | ||||||
|  | // NOTE: If an invalid UTF8 sequence is encountered a '?'(0x3f) codepoint is counted instead | ||||||
|  | int GetCodepointsCount(const char *text) | ||||||
|  | { | ||||||
|  |     unsigned int len = 0; | ||||||
|  |     char *ptr = (char *)&text[0]; | ||||||
|  |  | ||||||
|  |     while (*ptr != '\0') | ||||||
|  |     { | ||||||
|  |         int next = 0; | ||||||
|  |         int letter = GetNextCodepoint(ptr, &next); | ||||||
|  |  | ||||||
|  |         if (letter == 0x3f) ptr += 1; | ||||||
|  |         else ptr += next; | ||||||
|  |  | ||||||
|  |         len++; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     return len; | ||||||
|  | } | ||||||
|  |  | ||||||
|  |  | ||||||
|  | // Returns next codepoint in a UTF8 encoded text, scanning until '\0' is found | ||||||
|  | // When a invalid UTF8 byte is encountered we exit as soon as possible and a '?'(0x3f) codepoint is returned | ||||||
|  | // Total number of bytes processed are returned as a parameter | ||||||
|  | // NOTE: the standard says U+FFFD should be returned in case of errors | ||||||
|  | // but that character is not supported by the default font in raylib | ||||||
|  | // TODO: optimize this code for speed!! | ||||||
|  | int GetNextCodepoint(const char *text, int *bytesProcessed) | ||||||
|  | { | ||||||
|  | /* | ||||||
|  |     UTF8 specs from https://www.ietf.org/rfc/rfc3629.txt | ||||||
|  |  | ||||||
|  |     Char. number range  |        UTF-8 octet sequence | ||||||
|  |       (hexadecimal)    |              (binary) | ||||||
|  |     --------------------+--------------------------------------------- | ||||||
|  |     0000 0000-0000 007F | 0xxxxxxx | ||||||
|  |     0000 0080-0000 07FF | 110xxxxx 10xxxxxx | ||||||
|  |     0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx | ||||||
|  |     0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | ||||||
|  | */ | ||||||
|  |     // NOTE: on decode errors we return as soon as possible | ||||||
|  |  | ||||||
|  |     int code = 0x3f;   // Codepoint (defaults to '?') | ||||||
|  |     int octet = (unsigned char)(text[0]); // The first UTF8 octet | ||||||
|  |     *bytesProcessed = 1; | ||||||
|  |  | ||||||
|  |     if (octet <= 0x7f) | ||||||
|  |     { | ||||||
|  |         // Only one octet (ASCII range x00-7F) | ||||||
|  |         code = text[0]; | ||||||
|  |     } | ||||||
|  |     else if ((octet & 0xe0) == 0xc0) | ||||||
|  |     { | ||||||
|  |         // Two octets | ||||||
|  |         // [0]xC2-DF    [1]UTF8-tail(x80-BF) | ||||||
|  |         unsigned char octet1 = text[1]; | ||||||
|  |  | ||||||
|  |         if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence | ||||||
|  |  | ||||||
|  |         if ((octet >= 0xc2) && (octet <= 0xdf)) | ||||||
|  |         { | ||||||
|  |             code = ((octet & 0x1f) << 6) | (octet1 & 0x3f); | ||||||
|  |             *bytesProcessed = 2; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else if ((octet & 0xf0) == 0xe0) | ||||||
|  |     { | ||||||
|  |         // Three octets | ||||||
|  |         unsigned char octet1 = text[1]; | ||||||
|  |         unsigned char octet2 = '\0'; | ||||||
|  |  | ||||||
|  |         if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence | ||||||
|  |  | ||||||
|  |         octet2 = text[2]; | ||||||
|  |  | ||||||
|  |         if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence | ||||||
|  |  | ||||||
|  |         /* | ||||||
|  |             [0]xE0    [1]xA0-BF       [2]UTF8-tail(x80-BF) | ||||||
|  |             [0]xE1-EC [1]UTF8-tail    [2]UTF8-tail(x80-BF) | ||||||
|  |             [0]xED    [1]x80-9F       [2]UTF8-tail(x80-BF) | ||||||
|  |             [0]xEE-EF [1]UTF8-tail    [2]UTF8-tail(x80-BF) | ||||||
|  |         */ | ||||||
|  |  | ||||||
|  |         if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) || | ||||||
|  |             ((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { *bytesProcessed = 2; return code; } | ||||||
|  |  | ||||||
|  |         if ((octet >= 0xe0) && (0 <= 0xef)) | ||||||
|  |         { | ||||||
|  |             code = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f); | ||||||
|  |             *bytesProcessed = 3; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     else if ((octet & 0xf8) == 0xf0) | ||||||
|  |     { | ||||||
|  |         // Four octets | ||||||
|  |         if (octet > 0xf4) return code; | ||||||
|  |  | ||||||
|  |         unsigned char octet1 = text[1]; | ||||||
|  |         unsigned char octet2 = '\0'; | ||||||
|  |         unsigned char octet3 = '\0'; | ||||||
|  |  | ||||||
|  |         if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; }  // Unexpected sequence | ||||||
|  |  | ||||||
|  |         octet2 = text[2]; | ||||||
|  |  | ||||||
|  |         if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; }  // Unexpected sequence | ||||||
|  |  | ||||||
|  |         octet3 = text[3]; | ||||||
|  |  | ||||||
|  |         if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { *bytesProcessed = 4; return code; }  // Unexpected sequence | ||||||
|  |  | ||||||
|  |         /* | ||||||
|  |             [0]xF0       [1]x90-BF       [2]UTF8-tail  [3]UTF8-tail | ||||||
|  |             [0]xF1-F3    [1]UTF8-tail    [2]UTF8-tail  [3]UTF8-tail | ||||||
|  |             [0]xF4       [1]x80-8F       [2]UTF8-tail  [3]UTF8-tail | ||||||
|  |         */ | ||||||
|  |  | ||||||
|  |         if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) || | ||||||
|  |             ((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { *bytesProcessed = 2; return code; } // Unexpected sequence | ||||||
|  |  | ||||||
|  |         if (octet >= 0xf0) | ||||||
|  |         { | ||||||
|  |             code = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f); | ||||||
|  |             *bytesProcessed = 4; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (code > 0x10ffff) code = 0x3f;     // Codepoints after U+10ffff are invalid | ||||||
|  |  | ||||||
|  |     return code; | ||||||
|  | } | ||||||
| //---------------------------------------------------------------------------------- | //---------------------------------------------------------------------------------- | ||||||
|  |  | ||||||
| //---------------------------------------------------------------------------------- | //---------------------------------------------------------------------------------- | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Ray
					Ray