vim-patch:7.4.1604

Problem:    Although emoji characters are ambiguous width, best is to treat
            them as full width.
Solution:   Update the Unicode character tables. Add the 'emoji' options.
            (Yasuhiro Matsumoto)

3848e00e01
This commit is contained in:
James McCoy
2016-09-21 10:15:19 -04:00
parent 68bcb32ec4
commit d533edf61e
10 changed files with 80 additions and 15 deletions

View File

@@ -1,4 +1,4 @@
*options.txt* For Vim version 7.4. Last change: 2016 Apr 12 *options.txt* For Vim version 7.4. Last change: 2016 Mar 19
VIM REFERENCE MANUAL by Bram Moolenaar VIM REFERENCE MANUAL by Bram Moolenaar
@@ -899,7 +899,7 @@ A jump table for the options with a short description can be found at |Q_op|.
- The backup file will be created in the first directory in the list - The backup file will be created in the first directory in the list
where this is possible. The directory must exist, Vim will not where this is possible. The directory must exist, Vim will not
create it for you. create it for you.
- Empty means that no backup file will be created ( 'patchmode' is - Empty means that no backup file will be created ('patchmode' is
impossible!). Writing may fail because of this. impossible!). Writing may fail because of this.
- A directory "." means to put the backup file in the same directory - A directory "." means to put the backup file in the same directory
as the edited file. as the edited file.
@@ -1456,7 +1456,7 @@ A jump table for the options with a short description can be found at |Q_op|.
when CTRL-P or CTRL-N are used. It is also used for whole-line when CTRL-P or CTRL-N are used. It is also used for whole-line
completion |i_CTRL-X_CTRL-L|. It indicates the type of completion completion |i_CTRL-X_CTRL-L|. It indicates the type of completion
and the places to scan. It is a comma separated list of flags: and the places to scan. It is a comma separated list of flags:
. scan the current buffer ( 'wrapscan' is ignored) . scan the current buffer ('wrapscan' is ignored)
w scan buffers from other windows w scan buffers from other windows
b scan other loaded buffers that are in the buffer list b scan other loaded buffers that are in the buffer list
u scan the unloaded buffers that are in the buffer list u scan the unloaded buffers that are in the buffer list
@@ -2123,6 +2123,15 @@ A jump table for the options with a short description can be found at |Q_op|.
hor horizontally, height of windows is not affected hor horizontally, height of windows is not affected
both width and height of windows is affected both width and height of windows is affected
*'emoji'* *'emo'*
'emoji' 'emo' boolean (default: on)
global
{not in Vi}
{only available when compiled with the |+multi_byte|
feature}
When on all Unicode emoji characters are considered to be full width.
*'encoding'* *'enc'* *E543* *'encoding'* *'enc'* *E543*
'encoding' 'enc' string (default: "utf-8") 'encoding' 'enc' string (default: "utf-8")
global global
@@ -3082,7 +3091,7 @@ A jump table for the options with a short description can be found at |Q_op|.
The same applies to the modeless selection. The same applies to the modeless selection.
*'go-P'* *'go-P'*
'P' Like autoselect but using the "+ register instead of the "* 'P' Like autoselect but using the "+ register instead of the "*
register. register.
*'go-A'* *'go-A'*
'A' Autoselect for the modeless selection. Like 'a', but only 'A' Autoselect for the modeless selection. Like 'a', but only
applies to the modeless selection. applies to the modeless selection.
@@ -4378,7 +4387,7 @@ A jump table for the options with a short description can be found at |Q_op|.
respectively; see |CTRL-A| for more info on these commands. respectively; see |CTRL-A| for more info on these commands.
alpha If included, single alphabetical characters will be alpha If included, single alphabetical characters will be
incremented or decremented. This is useful for a list with a incremented or decremented. This is useful for a list with a
letter index a), b), etc. *octal-nrformats* letter index a), b), etc. *octal-nrformats*
octal If included, numbers that start with a zero will be considered octal If included, numbers that start with a zero will be considered
to be octal. Example: Using CTRL-A on "007" results in "010". to be octal. Example: Using CTRL-A on "007" results in "010".
hex If included, numbers starting with "0x" or "0X" will be hex If included, numbers starting with "0x" or "0X" will be
@@ -4408,7 +4417,7 @@ A jump table for the options with a short description can be found at |Q_op|.
relative to the cursor. Together with 'number' there are these relative to the cursor. Together with 'number' there are these
four combinations (cursor in line 3): four combinations (cursor in line 3):
'nonu' 'nu' 'nonu' 'nu' 'nonu' 'nu' 'nonu' 'nu'
'nornu' 'nornu' 'rnu' 'rnu' 'nornu' 'nornu' 'rnu' 'rnu'
|apple | 1 apple | 2 apple | 2 apple |apple | 1 apple | 2 apple | 2 apple

View File

@@ -1195,6 +1195,8 @@ if has("multi_byte")
endif endif
call append("$", "ambiwidth\twidth of ambiguous width characters") call append("$", "ambiwidth\twidth of ambiguous width characters")
call <SID>OptionG("ambw", &ambw) call <SID>OptionG("ambw", &ambw)
call append("$", "emoji\temoji characters are full width")
call <SID>BinOptionG("emo", &emo)
endif endif

View File

@@ -1,11 +1,11 @@
#!/bin/sh #!/bin/sh
set -e set -e
data_files="UnicodeData.txt CaseFolding.txt EastAsianWidth.txt"
files="UnicodeData.txt CaseFolding.txt EastAsianWidth.txt" emoji_files="emoji-data.txt"
UNIDIR_DEFAULT=unicode UNIDIR_DEFAULT=unicode
DOWNLOAD_URL_BASE_DEFAULT='http://unicode.org/Public/UNIDATA' DOWNLOAD_URL_BASE_DEFAULT='http://unicode.org/Public'
if test x$1 = 'x--help' ; then if test x$1 = 'x--help' ; then
echo 'Usage:' echo 'Usage:'
@@ -21,8 +21,16 @@ fi
UNIDIR=${1:-$UNIDIR_DEFAULT} UNIDIR=${1:-$UNIDIR_DEFAULT}
DOWNLOAD_URL_BASE=${2:-$DOWNLOAD_URL_BASE_DEFAULT} DOWNLOAD_URL_BASE=${2:-$DOWNLOAD_URL_BASE_DEFAULT}
for filename in $files ; do for filename in $data_files ; do
curl -o "$UNIDIR/$filename" "$DOWNLOAD_URL_BASE/$filename" curl -o "$UNIDIR/$filename" "$DOWNLOAD_URL_BASE/UNIDATA/$filename"
(
cd "$UNIDIR"
git add $filename
)
done
for filename in $emoji_files ; do
curl -o "$UNIDIR/$filename" "$DOWNLOAD_URL_BASE/emoji/3.0/$filename"
( (
cd "$UNIDIR" cd "$UNIDIR"
git add $filename git add $filename

View File

@@ -12,10 +12,12 @@
-- 2 then interval applies only to first, third, fifth, … character in range. -- 2 then interval applies only to first, third, fifth, … character in range.
-- Fourth value is number that should be added to the codepoint to yield -- Fourth value is number that should be added to the codepoint to yield
-- folded/lower/upper codepoint. -- folded/lower/upper codepoint.
-- 4. emoji table: sorted list of non-overlapping closed intervals of Emoji
-- characters
if arg[1] == '--help' then if arg[1] == '--help' then
print('Usage:') print('Usage:')
print(' genunicodetables.lua UnicodeData.txt CaseFolding.txt ' .. print(' genunicodetables.lua UnicodeData.txt CaseFolding.txt ' ..
'EastAsianWidth.txt') 'EastAsianWidth.txt emoji-data.txt')
print(' unicode_tables.generated.h') print(' unicode_tables.generated.h')
os.exit(0) os.exit(0)
end end
@@ -23,8 +25,9 @@ end
local unicodedata_fname = arg[1] local unicodedata_fname = arg[1]
local casefolding_fname = arg[2] local casefolding_fname = arg[2]
local eastasianwidth_fname = arg[3] local eastasianwidth_fname = arg[3]
local emoji_fname = arg[4]
local utf_tables_fname = arg[4] local utf_tables_fname = arg[5]
local split_on_semicolons = function(s) local split_on_semicolons = function(s)
local ret = {} local ret = {}
@@ -79,6 +82,10 @@ local parse_width_props = function(eaw_fp)
return fp_lines_to_lists(eaw_fp, 2, true) return fp_lines_to_lists(eaw_fp, 2, true)
end end
local parse_emoji_props = function(emoji_fp)
return fp_lines_to_lists(emoji_fp, 2, true)
end
local make_range = function(start, end_, step, add) local make_range = function(start, end_, step, add)
if step and add then if step and add then
return (' {0x%x, 0x%x, %d, %d},\n'):format( return (' {0x%x, 0x%x, %d, %d},\n'):format(
@@ -213,6 +220,24 @@ local build_width_table = function(ut_fp, dataprops, widthprops, widths,
ut_fp:write('};\n') ut_fp:write('};\n')
end end
local build_emoji_table = function(ut_fp, emojiprops)
ut_fp:write('static const struct interval emoji[] = {\n')
for _, p in ipairs(emojiprops) do
if p[2]:match('Emoji%s+#') then
local start, end_ = p[1]:find('%.%.')
if start then
local n = tonumber(p[1]:sub(1, start - 1), 16)
local nl = tonumber(p[1]:sub(end_ + 1), 16)
ut_fp:write(make_range(n, nl))
else
local n = tonumber(p[1], 16)
ut_fp:write(make_range(n, n))
end
end
end
ut_fp:write('};\n')
end
local ud_fp = io.open(unicodedata_fname, 'r') local ud_fp = io.open(unicodedata_fname, 'r')
local dataprops = parse_data_to_props(ud_fp) local dataprops = parse_data_to_props(ud_fp)
ud_fp:close() ud_fp:close()
@@ -236,4 +261,10 @@ eaw_fp:close()
build_width_table(ut_fp, dataprops, widthprops, {W=true, F=true}, 'doublewidth') build_width_table(ut_fp, dataprops, widthprops, {W=true, F=true}, 'doublewidth')
build_width_table(ut_fp, dataprops, widthprops, {A=true}, 'ambiguous') build_width_table(ut_fp, dataprops, widthprops, {A=true}, 'ambiguous')
local emoji_fp = io.open(emoji_fname, 'r')
local emojiprops = parse_emoji_props(emoji_fp)
emoji_fp:close()
build_emoji_table(ut_fp, emojiprops)
ut_fp:close() ut_fp:close()

View File

@@ -39,6 +39,7 @@ set(UNICODE_DIR ${PROJECT_SOURCE_DIR}/unicode)
set(UNICODEDATA_FILE ${UNICODE_DIR}/UnicodeData.txt) set(UNICODEDATA_FILE ${UNICODE_DIR}/UnicodeData.txt)
set(CASEFOLDING_FILE ${UNICODE_DIR}/CaseFolding.txt) set(CASEFOLDING_FILE ${UNICODE_DIR}/CaseFolding.txt)
set(EASTASIANWIDTH_FILE ${UNICODE_DIR}/EastAsianWidth.txt) set(EASTASIANWIDTH_FILE ${UNICODE_DIR}/EastAsianWidth.txt)
set(EMOJI_FILE ${UNICODE_DIR}/emoji-data.txt)
set(GENERATED_UNICODE_TABLES ${GENERATED_DIR}/unicode_tables.generated.h) set(GENERATED_UNICODE_TABLES ${GENERATED_DIR}/unicode_tables.generated.h)
include_directories(${GENERATED_DIR}) include_directories(${GENERATED_DIR})
@@ -195,12 +196,14 @@ add_custom_command(OUTPUT ${GENERATED_UNICODE_TABLES}
${UNICODEDATA_FILE} ${UNICODEDATA_FILE}
${CASEFOLDING_FILE} ${CASEFOLDING_FILE}
${EASTASIANWIDTH_FILE} ${EASTASIANWIDTH_FILE}
${EMOJI_FILE}
${GENERATED_UNICODE_TABLES} ${GENERATED_UNICODE_TABLES}
DEPENDS DEPENDS
${UNICODE_TABLES_GENERATOR} ${UNICODE_TABLES_GENERATOR}
${UNICODEDATA_FILE} ${UNICODEDATA_FILE}
${CASEFOLDING_FILE} ${CASEFOLDING_FILE}
${EASTASIANWIDTH_FILE} ${EASTASIANWIDTH_FILE}
${EMOJI_FILE}
) )
add_custom_command(OUTPUT ${GENERATED_API_DISPATCH} ${API_METADATA} add_custom_command(OUTPUT ${GENERATED_API_DISPATCH} ${API_METADATA}

View File

@@ -949,6 +949,9 @@ int utf_char2cells(int c)
if (intable(doublewidth, ARRAY_SIZE(doublewidth), c)) if (intable(doublewidth, ARRAY_SIZE(doublewidth), c))
return 2; return 2;
#endif #endif
if (p_emoji && intable(emoji, ARRAY_SIZE(emoji), c)) {
return 2;
}
} }
/* Characters below 0x100 are influenced by 'isprint' option */ /* Characters below 0x100 are influenced by 'isprint' option */
else if (c >= 0x80 && !vim_isprintc(c)) else if (c >= 0x80 && !vim_isprintc(c))

View File

@@ -2520,7 +2520,7 @@ did_set_string_option (
errmsg = e_invarg; errmsg = e_invarg;
} }
/* 'ambiwidth' */ /* 'ambiwidth' */
else if (varp == &p_ambw) { else if (varp == &p_ambw || (bool *)varp == &p_emoji) {
if (check_opt_strings(p_ambw, p_ambw_values, FALSE) != OK) if (check_opt_strings(p_ambw, p_ambw_values, FALSE) != OK)
errmsg = e_invarg; errmsg = e_invarg;
else if (set_chars_option(&p_lcs) != NULL) else if (set_chars_option(&p_lcs) != NULL)

View File

@@ -402,6 +402,7 @@ static char *(p_dy_values[]) = {"lastline", "uhex", NULL};
#define DY_LASTLINE 0x001 #define DY_LASTLINE 0x001
#define DY_UHEX 0x002 #define DY_UHEX 0x002
EXTERN int p_ed; /* 'edcompatible' */ EXTERN int p_ed; /* 'edcompatible' */
EXTERN bool p_emoji; // 'emoji'
EXTERN char_u *p_ead; /* 'eadirection' */ EXTERN char_u *p_ead; /* 'eadirection' */
EXTERN bool p_ea; /* 'equalalways' */ EXTERN bool p_ea; /* 'equalalways' */
EXTERN char_u *p_ep; /* 'equalprg' */ EXTERN char_u *p_ep; /* 'equalprg' */

View File

@@ -643,6 +643,14 @@ return {
varname='p_force_off', varname='p_force_off',
defaults={if_true={vi=false}} defaults={if_true={vi=false}}
}, },
{
full_name='emoji', abbreviation='emo',
type='bool', scope={'global'},
vi_def=true,
redraw={'everything'},
varname='p_emoji',
defaults={if_true={vi=true}}
},
{ {
full_name='encoding', abbreviation='enc', full_name='encoding', abbreviation='enc',
type='string', scope={'global'}, type='string', scope={'global'},

View File

@@ -839,7 +839,7 @@ static int included_patches[] = {
// 1607, // 1607,
// 1606, // 1606,
// 1605, // 1605,
// 1604, 1604,
1603, 1603,
// 1602 NA // 1602 NA
// 1601 NA // 1601 NA