vim-patch:7.4.1604

Problem:    Although emoji characters are ambiguous width, best is to treat
            them as full width.
Solution:   Update the Unicode character tables. Add the 'emoji' options.
            (Yasuhiro Matsumoto)

3848e00e01
This commit is contained in:
James McCoy
2016-09-21 10:15:19 -04:00
parent 68bcb32ec4
commit d533edf61e
10 changed files with 80 additions and 15 deletions

View File

@@ -1,4 +1,4 @@
*options.txt* For Vim version 7.4. Last change: 2016 Apr 12 *options.txt* For Vim version 7.4. Last change: 2016 Mar 19
VIM REFERENCE MANUAL by Bram Moolenaar VIM REFERENCE MANUAL by Bram Moolenaar
@@ -2123,6 +2123,15 @@ A jump table for the options with a short description can be found at |Q_op|.
hor horizontally, height of windows is not affected hor horizontally, height of windows is not affected
both width and height of windows is affected both width and height of windows is affected
*'emoji'* *'emo'*
'emoji' 'emo' boolean (default: on)
global
{not in Vi}
{only available when compiled with the |+multi_byte|
feature}
When on all Unicode emoji characters are considered to be full width.
*'encoding'* *'enc'* *E543* *'encoding'* *'enc'* *E543*
'encoding' 'enc' string (default: "utf-8") 'encoding' 'enc' string (default: "utf-8")
global global

View File

@@ -1195,6 +1195,8 @@ if has("multi_byte")
endif endif
call append("$", "ambiwidth\twidth of ambiguous width characters") call append("$", "ambiwidth\twidth of ambiguous width characters")
call <SID>OptionG("ambw", &ambw) call <SID>OptionG("ambw", &ambw)
call append("$", "emoji\temoji characters are full width")
call <SID>BinOptionG("emo", &emo)
endif endif

View File

@@ -1,11 +1,11 @@
#!/bin/sh #!/bin/sh
set -e set -e
data_files="UnicodeData.txt CaseFolding.txt EastAsianWidth.txt"
files="UnicodeData.txt CaseFolding.txt EastAsianWidth.txt" emoji_files="emoji-data.txt"
UNIDIR_DEFAULT=unicode UNIDIR_DEFAULT=unicode
DOWNLOAD_URL_BASE_DEFAULT='http://unicode.org/Public/UNIDATA' DOWNLOAD_URL_BASE_DEFAULT='http://unicode.org/Public'
if test x$1 = 'x--help' ; then if test x$1 = 'x--help' ; then
echo 'Usage:' echo 'Usage:'
@@ -21,8 +21,16 @@ fi
UNIDIR=${1:-$UNIDIR_DEFAULT} UNIDIR=${1:-$UNIDIR_DEFAULT}
DOWNLOAD_URL_BASE=${2:-$DOWNLOAD_URL_BASE_DEFAULT} DOWNLOAD_URL_BASE=${2:-$DOWNLOAD_URL_BASE_DEFAULT}
for filename in $files ; do for filename in $data_files ; do
curl -o "$UNIDIR/$filename" "$DOWNLOAD_URL_BASE/$filename" curl -o "$UNIDIR/$filename" "$DOWNLOAD_URL_BASE/UNIDATA/$filename"
(
cd "$UNIDIR"
git add $filename
)
done
for filename in $emoji_files ; do
curl -o "$UNIDIR/$filename" "$DOWNLOAD_URL_BASE/emoji/3.0/$filename"
( (
cd "$UNIDIR" cd "$UNIDIR"
git add $filename git add $filename

View File

@@ -12,10 +12,12 @@
-- 2 then interval applies only to first, third, fifth, … character in range. -- 2 then interval applies only to first, third, fifth, … character in range.
-- Fourth value is number that should be added to the codepoint to yield -- Fourth value is number that should be added to the codepoint to yield
-- folded/lower/upper codepoint. -- folded/lower/upper codepoint.
-- 4. emoji table: sorted list of non-overlapping closed intervals of Emoji
-- characters
if arg[1] == '--help' then if arg[1] == '--help' then
print('Usage:') print('Usage:')
print(' genunicodetables.lua UnicodeData.txt CaseFolding.txt ' .. print(' genunicodetables.lua UnicodeData.txt CaseFolding.txt ' ..
'EastAsianWidth.txt') 'EastAsianWidth.txt emoji-data.txt')
print(' unicode_tables.generated.h') print(' unicode_tables.generated.h')
os.exit(0) os.exit(0)
end end
@@ -23,8 +25,9 @@ end
local unicodedata_fname = arg[1] local unicodedata_fname = arg[1]
local casefolding_fname = arg[2] local casefolding_fname = arg[2]
local eastasianwidth_fname = arg[3] local eastasianwidth_fname = arg[3]
local emoji_fname = arg[4]
local utf_tables_fname = arg[4] local utf_tables_fname = arg[5]
local split_on_semicolons = function(s) local split_on_semicolons = function(s)
local ret = {} local ret = {}
@@ -79,6 +82,10 @@ local parse_width_props = function(eaw_fp)
return fp_lines_to_lists(eaw_fp, 2, true) return fp_lines_to_lists(eaw_fp, 2, true)
end end
local parse_emoji_props = function(emoji_fp)
return fp_lines_to_lists(emoji_fp, 2, true)
end
local make_range = function(start, end_, step, add) local make_range = function(start, end_, step, add)
if step and add then if step and add then
return (' {0x%x, 0x%x, %d, %d},\n'):format( return (' {0x%x, 0x%x, %d, %d},\n'):format(
@@ -213,6 +220,24 @@ local build_width_table = function(ut_fp, dataprops, widthprops, widths,
ut_fp:write('};\n') ut_fp:write('};\n')
end end
local build_emoji_table = function(ut_fp, emojiprops)
ut_fp:write('static const struct interval emoji[] = {\n')
for _, p in ipairs(emojiprops) do
if p[2]:match('Emoji%s+#') then
local start, end_ = p[1]:find('%.%.')
if start then
local n = tonumber(p[1]:sub(1, start - 1), 16)
local nl = tonumber(p[1]:sub(end_ + 1), 16)
ut_fp:write(make_range(n, nl))
else
local n = tonumber(p[1], 16)
ut_fp:write(make_range(n, n))
end
end
end
ut_fp:write('};\n')
end
local ud_fp = io.open(unicodedata_fname, 'r') local ud_fp = io.open(unicodedata_fname, 'r')
local dataprops = parse_data_to_props(ud_fp) local dataprops = parse_data_to_props(ud_fp)
ud_fp:close() ud_fp:close()
@@ -236,4 +261,10 @@ eaw_fp:close()
build_width_table(ut_fp, dataprops, widthprops, {W=true, F=true}, 'doublewidth') build_width_table(ut_fp, dataprops, widthprops, {W=true, F=true}, 'doublewidth')
build_width_table(ut_fp, dataprops, widthprops, {A=true}, 'ambiguous') build_width_table(ut_fp, dataprops, widthprops, {A=true}, 'ambiguous')
local emoji_fp = io.open(emoji_fname, 'r')
local emojiprops = parse_emoji_props(emoji_fp)
emoji_fp:close()
build_emoji_table(ut_fp, emojiprops)
ut_fp:close() ut_fp:close()

View File

@@ -39,6 +39,7 @@ set(UNICODE_DIR ${PROJECT_SOURCE_DIR}/unicode)
set(UNICODEDATA_FILE ${UNICODE_DIR}/UnicodeData.txt) set(UNICODEDATA_FILE ${UNICODE_DIR}/UnicodeData.txt)
set(CASEFOLDING_FILE ${UNICODE_DIR}/CaseFolding.txt) set(CASEFOLDING_FILE ${UNICODE_DIR}/CaseFolding.txt)
set(EASTASIANWIDTH_FILE ${UNICODE_DIR}/EastAsianWidth.txt) set(EASTASIANWIDTH_FILE ${UNICODE_DIR}/EastAsianWidth.txt)
set(EMOJI_FILE ${UNICODE_DIR}/emoji-data.txt)
set(GENERATED_UNICODE_TABLES ${GENERATED_DIR}/unicode_tables.generated.h) set(GENERATED_UNICODE_TABLES ${GENERATED_DIR}/unicode_tables.generated.h)
include_directories(${GENERATED_DIR}) include_directories(${GENERATED_DIR})
@@ -195,12 +196,14 @@ add_custom_command(OUTPUT ${GENERATED_UNICODE_TABLES}
${UNICODEDATA_FILE} ${UNICODEDATA_FILE}
${CASEFOLDING_FILE} ${CASEFOLDING_FILE}
${EASTASIANWIDTH_FILE} ${EASTASIANWIDTH_FILE}
${EMOJI_FILE}
${GENERATED_UNICODE_TABLES} ${GENERATED_UNICODE_TABLES}
DEPENDS DEPENDS
${UNICODE_TABLES_GENERATOR} ${UNICODE_TABLES_GENERATOR}
${UNICODEDATA_FILE} ${UNICODEDATA_FILE}
${CASEFOLDING_FILE} ${CASEFOLDING_FILE}
${EASTASIANWIDTH_FILE} ${EASTASIANWIDTH_FILE}
${EMOJI_FILE}
) )
add_custom_command(OUTPUT ${GENERATED_API_DISPATCH} ${API_METADATA} add_custom_command(OUTPUT ${GENERATED_API_DISPATCH} ${API_METADATA}

View File

@@ -949,6 +949,9 @@ int utf_char2cells(int c)
if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) if (intable(doublewidth, ARRAY_SIZE(doublewidth), c))
return 2; return 2;
#endif #endif
if (p_emoji && intable(emoji, ARRAY_SIZE(emoji), c)) {
return 2;
}
} }
/* Characters below 0x100 are influenced by 'isprint' option */ /* Characters below 0x100 are influenced by 'isprint' option */
else if (c >= 0x80 && !vim_isprintc(c)) else if (c >= 0x80 && !vim_isprintc(c))

View File

@@ -2520,7 +2520,7 @@ did_set_string_option (
errmsg = e_invarg; errmsg = e_invarg;
} }
/* 'ambiwidth' */ /* 'ambiwidth' */
else if (varp == &p_ambw) { else if (varp == &p_ambw || (bool *)varp == &p_emoji) {
if (check_opt_strings(p_ambw, p_ambw_values, FALSE) != OK) if (check_opt_strings(p_ambw, p_ambw_values, FALSE) != OK)
errmsg = e_invarg; errmsg = e_invarg;
else if (set_chars_option(&p_lcs) != NULL) else if (set_chars_option(&p_lcs) != NULL)

View File

@@ -402,6 +402,7 @@ static char *(p_dy_values[]) = {"lastline", "uhex", NULL};
#define DY_LASTLINE 0x001 #define DY_LASTLINE 0x001
#define DY_UHEX 0x002 #define DY_UHEX 0x002
EXTERN int p_ed; /* 'edcompatible' */ EXTERN int p_ed; /* 'edcompatible' */
EXTERN bool p_emoji; // 'emoji'
EXTERN char_u *p_ead; /* 'eadirection' */ EXTERN char_u *p_ead; /* 'eadirection' */
EXTERN bool p_ea; /* 'equalalways' */ EXTERN bool p_ea; /* 'equalalways' */
EXTERN char_u *p_ep; /* 'equalprg' */ EXTERN char_u *p_ep; /* 'equalprg' */

View File

@@ -643,6 +643,14 @@ return {
varname='p_force_off', varname='p_force_off',
defaults={if_true={vi=false}} defaults={if_true={vi=false}}
}, },
{
full_name='emoji', abbreviation='emo',
type='bool', scope={'global'},
vi_def=true,
redraw={'everything'},
varname='p_emoji',
defaults={if_true={vi=true}}
},
{ {
full_name='encoding', abbreviation='enc', full_name='encoding', abbreviation='enc',
type='string', scope={'global'}, type='string', scope={'global'},

View File

@@ -839,7 +839,7 @@ static int included_patches[] = {
// 1607, // 1607,
// 1606, // 1606,
// 1605, // 1605,
// 1604, 1604,
1603, 1603,
// 1602 NA // 1602 NA
// 1601 NA // 1601 NA