feat(mbyte): support extended grapheme clusters including more emoji

Use the grapheme break algorithm from utf8proc to support grapheme
clusters from recent unicode versions.

Handle variant selector VS16 turning some codepoints into double-width
emoji. This means we need to use ptr2cells rather than char2cells when
possible.
This commit is contained in:
bfredl
2024-08-08 10:42:08 +02:00
parent 4353996d0f
commit cfdf68a7ac
34 changed files with 657 additions and 221 deletions

View File

@@ -3663,7 +3663,7 @@ func Test_string_reverse()
call assert_equal('', reverse(v:_null_string))
for [s1, s2] in [['', ''], ['a', 'a'], ['ab', 'ba'], ['abc', 'cba'],
\ ['abcd', 'dcba'], ['«-«-»-»', '»-»-«-«'],
\ ['🇦', '🇦'], ['🇦🇧', '🇧🇦'], ['🇦🇧🇨', '🇨🇧🇦'],
\ ['🇦', '🇦'], ['🇦🇧', '🇦🇧'], ['🇦🇧🇨', '🇨🇦🇧'],
\ ['🇦«🇧-🇨»🇩', '🇩»🇨-🇧«🇦']]
call assert_equal(s2, reverse(s1))
endfor

View File

@@ -3897,9 +3897,9 @@ func Test_normal_count_after_operator()
bw!
endfunc
func Test_normal_gj_on_extra_wide_char()
func Test_normal_gj_on_6_cell_wide_unprintable_char()
new | 25vsp
let text='1 foooooooo ar e inszwe1 foooooooo inszwei' .
let text='1 foooooooo ar e inszwe1 foooooooo inszwei' .
\ ' i drei vier fünf sechs sieben acht un zehn elf zwöfl' .
\ ' dreizehn v ierzehn fünfzehn'
put =text