Merge pull request #3655 from bfredl/enc_default

Default to encoding=utf-8
This commit is contained in:
Justin M. Keyes
2016-01-02 18:05:52 -05:00
7 changed files with 38 additions and 116 deletions

View File

@@ -2130,7 +2130,7 @@ A jump table for the options with a short description can be found at |Q_op|.
'edcompatible' 'ed' Removed. |vim-differences| {Nvim} 'edcompatible' 'ed' Removed. |vim-differences| {Nvim}
*'encoding'* *'enc'* *E543* *'encoding'* *'enc'* *E543*
'encoding' 'enc' string (default: "utf-8" or value from $LANG) 'encoding' 'enc' string (default: "utf-8")
global global
{only available when compiled with the |+multi_byte| {only available when compiled with the |+multi_byte|
feature} feature}
@@ -2152,10 +2152,6 @@ A jump table for the options with a short description can be found at |Q_op|.
can use: > can use: >
if has("multi_byte_encoding") if has("multi_byte_encoding")
< <
Normally 'encoding' will be equal to your current locale. This will
be the default if Vim recognizes your environment settings, otherwise
"utf-8" is used.
When you set this option, it fires the |EncodingChanged| autocommand When you set this option, it fires the |EncodingChanged| autocommand
event so that you can set up fonts if necessary. event so that you can set up fonts if necessary.
@@ -2172,9 +2168,6 @@ A jump table for the options with a short description can be found at |Q_op|.
setting 'encoding' to one of these values instead of utf-8 only has setting 'encoding' to one of these values instead of utf-8 only has
effect for encoding used for files when 'fileencoding' is empty. effect for encoding used for files when 'fileencoding' is empty.
When 'encoding' is set to a Unicode encoding, and 'fileencodings' was
not set yet, the default for 'fileencodings' is changed.
*'endofline'* *'eol'* *'noendofline'* *'noeol'* *'endofline'* *'eol'* *'noendofline'* *'noeol'*
'endofline' 'eol' boolean (default on) 'endofline' 'eol' boolean (default on)
local to buffer local to buffer
@@ -2345,9 +2338,7 @@ A jump table for the options with a short description can be found at |Q_op|.
old short name was 'fe', which is no longer used. old short name was 'fe', which is no longer used.
*'fileencodings'* *'fencs'* *'fileencodings'* *'fencs'*
'fileencodings' 'fencs' string (default: "ucs-bom", 'fileencodings' 'fencs' string (default: "ucs-bom,utf-8,default,latin1")
"ucs-bom,utf-8,default,latin1" when
'encoding' is set to a Unicode value)
global global
{only available when compiled with the |+multi_byte| {only available when compiled with the |+multi_byte|
feature} feature}
@@ -2387,9 +2378,8 @@ A jump table for the options with a short description can be found at |Q_op|.
because Vim cannot detect an error, thus the encoding is always because Vim cannot detect an error, thus the encoding is always
accepted. accepted.
The special value "default" can be used for the encoding from the The special value "default" can be used for the encoding from the
environment. This is the default value for 'encoding'. It is useful environment. It is useful when 'encoding' is set to "utf-8" and
when 'encoding' is set to "utf-8" and your environment uses a your environment uses a non-latin1 encoding, such as Russian.
non-latin1 encoding, such as Russian.
When 'encoding' is "utf-8" and a file contains an illegal byte When 'encoding' is "utf-8" and a file contains an illegal byte
sequence it won't be recognized as UTF-8. You can use the |8g8| sequence it won't be recognized as UTF-8. You can use the |8g8|
command to find the illegal byte sequence. command to find the illegal byte sequence.
@@ -3776,9 +3766,7 @@ A jump table for the options with a short description can be found at |Q_op|.
change 'iskeyword' instead. change 'iskeyword' instead.
*'iskeyword'* *'isk'* *'iskeyword'* *'isk'*
'iskeyword' 'isk' string (Vim default for 'iskeyword' 'isk' string (default: @,48-57,_,192-255
Win32: @,48-57,_,128-167,224-235
otherwise: @,48-57,_,192-255
Vi default: @,48-57,_) Vi default: @,48-57,_)
local to buffer local to buffer
Keywords are used in searching and recognizing with many commands: Keywords are used in searching and recognizing with many commands:
@@ -3791,8 +3779,7 @@ A jump table for the options with a short description can be found at |Q_op|.
When the 'lisp' option is on the '-' character is always included. When the 'lisp' option is on the '-' character is always included.
*'isprint'* *'isp'* *'isprint'* *'isp'*
'isprint' 'isp' string (default for MS-DOS, Win32, and Macintosh: 'isprint' 'isp' string (default: "@,161-255")
"@,~-255"; otherwise: "@,161-255")
global global
The characters given by this option are displayed directly on the The characters given by this option are displayed directly on the
screen. It is also used for "\p" in a |pattern|. The characters from screen. It is also used for "\p" in a |pattern|. The characters from

View File

@@ -798,6 +798,8 @@ EXTERN bool enc_utf8 INIT(= false); /* UTF-8 encoded Unicode */
EXTERN int enc_latin1like INIT(= TRUE); /* 'encoding' is latin1 comp. */ EXTERN int enc_latin1like INIT(= TRUE); /* 'encoding' is latin1 comp. */
EXTERN int has_mbyte INIT(= 0); /* any multi-byte encoding */ EXTERN int has_mbyte INIT(= 0); /* any multi-byte encoding */
/// Encoding used when 'fencs' is set to "default"
EXTERN char_u *fenc_default INIT(= NULL);
/* /*
* To speed up BYTELEN() we fill a table with the byte lengths whenever * To speed up BYTELEN() we fill a table with the byte lengths whenever

View File

@@ -568,11 +568,6 @@ char_u * mb_init(void)
/* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */ /* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */
screenalloc(false); screenalloc(false);
/* When using Unicode, set default for 'fileencodings'. */
if (enc_utf8 && !option_was_set((char_u *)"fencs"))
set_string_option_direct((char_u *)"fencs", -1,
(char_u *)"ucs-bom,utf-8,default,latin1", OPT_FREE, 0);
#ifdef HAVE_WORKING_LIBINTL #ifdef HAVE_WORKING_LIBINTL
/* GNU gettext 0.10.37 supports this feature: set the codeset used for /* GNU gettext 0.10.37 supports this feature: set the codeset used for
* translated messages independently from the current locale. */ * translated messages independently from the current locale. */
@@ -2417,11 +2412,8 @@ char_u *enc_canonize(char_u *enc) FUNC_ATTR_NONNULL_RET
int i; int i;
if (STRCMP(enc, "default") == 0) { if (STRCMP(enc, "default") == 0) {
/* Use the default encoding as it's found by set_init_1(). */ // Use the default encoding as found by set_init_1().
char_u *r = get_encoding_default(); return vim_strsave(fenc_default);
if (r == NULL)
r = (char_u *)"latin1";
return vim_strsave(r);
} }
/* copy "enc" to allocated memory, with room for two '-' */ /* copy "enc" to allocated memory, with room for two '-' */

View File

@@ -233,12 +233,6 @@ typedef struct vimoption {
#define P_CURSWANT 0x2000000U /* update curswant required; not needed when #define P_CURSWANT 0x2000000U /* update curswant required; not needed when
* there is a redraw flag */ * there is a redraw flag */
#define ISK_LATIN1 (char_u *)"@,48-57,_,192-255"
/* 'isprint' for latin1 is also used for MS-Windows cp1252, where 0x80 is used
* for the currency sign. */
# define ISP_LATIN1 (char_u *)"@,161-255"
#define HIGHLIGHT_INIT \ #define HIGHLIGHT_INIT \
"8:SpecialKey,~:EndOfBuffer,z:TermCursor,Z:TermCursorNC,@:NonText," \ "8:SpecialKey,~:EndOfBuffer,z:TermCursor,Z:TermCursorNC,@:NonText," \
"d:Directory,e:ErrorMsg,i:IncSearch,l:Search,m:MoreMsg,M:ModeMsg,n:LineNr," \ "d:Directory,e:ErrorMsg,i:IncSearch,l:Search,m:MoreMsg,M:ModeMsg,n:LineNr," \
@@ -776,59 +770,18 @@ void set_init_1(void)
/* Parse default for 'listchars'. */ /* Parse default for 'listchars'. */
(void)set_chars_option(&p_lcs); (void)set_chars_option(&p_lcs);
/* enc_locale() will try to find the encoding of the current locale. */ // enc_locale() will try to find the encoding of the current locale.
// This will be used when 'default' is used as encoding specifier
// in 'fileencodings'
char_u *p = enc_locale(); char_u *p = enc_locale();
if (p != NULL) { if (p == NULL) {
char_u *save_enc; // use utf-8 as 'default' if locale encoding can't be detected.
p = vim_strsave((char_u *)"utf-8");
/* Try setting 'encoding' and check if the value is valid.
* If not, go back to the default "utf-8". */
save_enc = p_enc;
p_enc = (char_u *) p;
if (STRCMP(p_enc, "gb18030") == 0) {
/* We don't support "gb18030", but "cp936" is a good substitute
* for practical purposes, thus use that. It's not an alias to
* still support conversion between gb18030 and utf-8. */
p_enc = vim_strsave((char_u *)"cp936");
xfree(p);
}
if (mb_init() == NULL) {
opt_idx = findoption((char_u *)"encoding");
if (opt_idx >= 0) {
options[opt_idx].def_val[VI_DEFAULT] = p_enc;
options[opt_idx].flags |= P_DEF_ALLOCED;
} }
fenc_default = p;
#if defined(MSWIN) || defined(MACOS) // Initialize multibyte (utf-8) handling
if (STRCMP(p_enc, "latin1") == 0
|| enc_utf8
) {
/* Adjust the default for 'isprint' and 'iskeyword' to match
* latin1. */
set_string_option_direct((char_u *)"isp", -1,
ISP_LATIN1, OPT_FREE, SID_NONE);
set_string_option_direct((char_u *)"isk", -1,
ISK_LATIN1, OPT_FREE, SID_NONE);
opt_idx = findoption((char_u *)"isp");
if (opt_idx >= 0)
options[opt_idx].def_val[VIM_DEFAULT] = ISP_LATIN1;
opt_idx = findoption((char_u *)"isk");
if (opt_idx >= 0)
options[opt_idx].def_val[VIM_DEFAULT] = ISK_LATIN1;
(void)init_chartab();
}
#endif
} else {
xfree(p_enc);
// mb_init() failed; fallback to utf8 and try again.
p_enc = save_enc;
mb_init(); mb_init();
}
} else {
// enc_locale() failed; initialize the default (utf8).
mb_init();
}
// Don't change &encoding when resetting to defaults with ":set all&". // Don't change &encoding when resetting to defaults with ":set all&".
opt_idx = findoption((char_u *)"encoding"); opt_idx = findoption((char_u *)"encoding");
@@ -4669,16 +4622,6 @@ char_u *get_highlight_default(void)
return (char_u *)NULL; return (char_u *)NULL;
} }
char_u *get_encoding_default(void)
{
int i;
i = findoption((char_u *)"enc");
if (i >= 0)
return options[i].def_val[VI_DEFAULT];
return (char_u *)NULL;
}
/* /*
* Translate a string like "t_xx", "<t_xx>" or "<S-Tab>" to a key number. * Translate a string like "t_xx", "<t_xx>" or "<S-Tab>" to a key number.
*/ */

View File

@@ -748,7 +748,7 @@ return {
type='string', list='comma', scope={'global'}, type='string', list='comma', scope={'global'},
vi_def=true, vi_def=true,
varname='p_fencs', varname='p_fencs',
defaults={if_true={vi="ucs-bom"}} defaults={if_true={vi="ucs-bom,utf-8,default,latin1"}}
}, },
{ {
full_name='fileformat', abbreviation='ff', full_name='fileformat', abbreviation='ff',
@@ -1285,7 +1285,7 @@ return {
vim=true, vim=true,
alloced=true, alloced=true,
varname='p_isk', varname='p_isk',
defaults={if_true={vi="@,48-57,_", vim=macros('ISK_LATIN1')}} defaults={if_true={vi="@,48-57,_", vim="@,48-57,_,192-255"}}
}, },
{ {
full_name='isprint', abbreviation='isp', full_name='isprint', abbreviation='isp',
@@ -1294,10 +1294,7 @@ return {
vi_def=true, vi_def=true,
redraw={'all_windows'}, redraw={'all_windows'},
varname='p_isp', varname='p_isp',
defaults={ defaults={if_true={vi="@,161-255"}
condition='MSWIN',
if_true={vi="@,~-255"},
if_false={vi=macros("ISP_LATIN1")}
} }
}, },
{ {

View File

@@ -21,20 +21,21 @@ describe('&encoding', function()
eq(3, eval('strwidth("Bär")')) eq(3, eval('strwidth("Bär")'))
end) end)
it('is not changed by `set all&`', function() it('can be changed before startup', function()
-- we need to set &encoding to something non-default clear('set enc=latin1')
-- use 'latin1' when enc&vi is 'utf-8', 'utf-8' otherwise execute('set encoding=utf-8')
execute('set fenc=default') -- error message expected
local enc_default, enc_other, width = eval('&fenc'), 'utf-8', 3 feed('<cr>')
if enc_default == 'utf-8' then eq('latin1', eval('&encoding'))
enc_other = 'latin1' eq(4, eval('strwidth("Bär")'))
width = 4 -- utf-8 string 'Bär' will count as 4 latin1 chars end)
end
clear('set enc=' .. enc_other) it('is not changed by `set all&`', function()
-- we need to set &encoding to something non-default. Use 'latin1'
clear('set enc=latin1')
execute('set all&') execute('set all&')
eq(enc_other, eval('&encoding')) eq('latin1', eval('&encoding'))
eq(width, eval('strwidth("Bär")')) eq(4, eval('strwidth("Bär")'))
end) end)
end) end)

View File

@@ -8,7 +8,7 @@ local Session = require('nvim.session')
local nvim_prog = os.getenv('NVIM_PROG') or 'build/bin/nvim' local nvim_prog = os.getenv('NVIM_PROG') or 'build/bin/nvim'
local nvim_argv = {nvim_prog, '-u', 'NONE', '-i', 'NONE', '-N', local nvim_argv = {nvim_prog, '-u', 'NONE', '-i', 'NONE', '-N',
'--cmd', 'set shortmess+=I background=light noswapfile noautoindent laststatus=1 encoding=utf-8 undodir=. directory=. viewdir=. backupdir=.', '--cmd', 'set shortmess+=I background=light noswapfile noautoindent laststatus=1 undodir=. directory=. viewdir=. backupdir=.',
'--embed'} '--embed'}
-- Formulate a path to the directory containing nvim. We use this to -- Formulate a path to the directory containing nvim. We use this to