mirror of
https://github.com/neovim/neovim.git
synced 2025-09-28 05:58:33 +00:00
Merge pull request #30272 from bfredl/replace_emoji
fix(multibyte): handle backspace of wide clusters in replace mode
This commit is contained in:
@@ -523,12 +523,14 @@ int utf_ptr2cells(const char *p_in)
|
||||
}
|
||||
|
||||
/// Convert a UTF-8 byte sequence to a character number.
|
||||
/// Doesn't handle ascii! only multibyte and illegal sequences.
|
||||
/// Doesn't handle ascii! only multibyte and illegal sequences. ASCII (including NUL)
|
||||
/// are treated like illegal sequences.
|
||||
///
|
||||
/// @param[in] p String to convert.
|
||||
/// @param[in] len Length of the character in bytes, 0 or 1 if illegal.
|
||||
///
|
||||
/// @return Unicode codepoint. A negative value when the sequence is illegal.
|
||||
/// @return Unicode codepoint. A negative value when the sequence is illegal (or
|
||||
/// ASCII, including NUL).
|
||||
int32_t utf_ptr2CharInfo_impl(uint8_t const *p, uintptr_t const len)
|
||||
FUNC_ATTR_PURE FUNC_ATTR_NONNULL_ALL FUNC_ATTR_WARN_UNUSED_RESULT
|
||||
{
|
||||
@@ -1780,15 +1782,15 @@ int utf_head_off(const char *base_in, const char *p_in)
|
||||
start--;
|
||||
}
|
||||
|
||||
uint8_t cur_len = utf8len_tab[*start];
|
||||
int32_t cur_code = utf_ptr2CharInfo_impl(start, (uintptr_t)cur_len);
|
||||
if (cur_code < 0) {
|
||||
const uint8_t last_len = utf8len_tab[*start];
|
||||
int32_t cur_code = utf_ptr2CharInfo_impl(start, (uintptr_t)last_len);
|
||||
if (cur_code < 0 || p - start >= last_len) {
|
||||
return 0; // p must be part of an illegal sequence
|
||||
}
|
||||
const uint8_t * const safe_end = start + cur_len;
|
||||
const uint8_t * const safe_end = start + last_len;
|
||||
|
||||
int cur_bc = utf8proc_get_property(cur_code)->boundclass;
|
||||
if (always_break(cur_bc)) {
|
||||
if (always_break(cur_bc) || start == base) {
|
||||
return (int)(p - start);
|
||||
}
|
||||
|
||||
@@ -1796,18 +1798,23 @@ int utf_head_off(const char *base_in, const char *p_in)
|
||||
const uint8_t *cur_pos = start;
|
||||
const uint8_t *const p_start = start;
|
||||
|
||||
if (start == base) {
|
||||
return (int)(p - start);
|
||||
}
|
||||
while (true) {
|
||||
if (start[-1] == NUL) {
|
||||
break;
|
||||
}
|
||||
|
||||
start--;
|
||||
if (*start < 0x80) { // stop on ascii, we are done
|
||||
break;
|
||||
}
|
||||
|
||||
start--;
|
||||
while (*start >= 0x80) { // stop on ascii, we are done
|
||||
while (start > base && (*start & 0xc0) == 0x80 && (cur_pos - start) < 6) {
|
||||
start--;
|
||||
}
|
||||
|
||||
int32_t prev_code = utf_ptr2CharInfo_impl(start, (uintptr_t)utf8len_tab[*start]);
|
||||
if (prev_code < 0) {
|
||||
int prev_len = utf8len_tab[*start];
|
||||
int32_t prev_code = utf_ptr2CharInfo_impl(start, (uintptr_t)prev_len);
|
||||
if (prev_code < 0 || prev_len < cur_pos - start) {
|
||||
start = cur_pos; // start at valid sequence after invalid bytes
|
||||
break;
|
||||
}
|
||||
@@ -1822,12 +1829,10 @@ int utf_head_off(const char *base_in, const char *p_in)
|
||||
cur_pos = start;
|
||||
cur_bc = prev_bc;
|
||||
cur_code = prev_code;
|
||||
|
||||
start--;
|
||||
}
|
||||
|
||||
// hot path: we are already on the first codepoint of a sequence
|
||||
if (start == p_start) {
|
||||
if (start == p_start && last_len > p - start) {
|
||||
return (int)(p - start);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user