mirror of
https://github.com/neovim/neovim.git
synced 2025-11-04 17:54:30 +00:00
Merge pull request #3655 from bfredl/enc_default
Default to encoding=utf-8
This commit is contained in:
@@ -2130,7 +2130,7 @@ A jump table for the options with a short description can be found at |Q_op|.
|
|||||||
'edcompatible' 'ed' Removed. |vim-differences| {Nvim}
|
'edcompatible' 'ed' Removed. |vim-differences| {Nvim}
|
||||||
|
|
||||||
*'encoding'* *'enc'* *E543*
|
*'encoding'* *'enc'* *E543*
|
||||||
'encoding' 'enc' string (default: "utf-8" or value from $LANG)
|
'encoding' 'enc' string (default: "utf-8")
|
||||||
global
|
global
|
||||||
{only available when compiled with the |+multi_byte|
|
{only available when compiled with the |+multi_byte|
|
||||||
feature}
|
feature}
|
||||||
@@ -2152,10 +2152,6 @@ A jump table for the options with a short description can be found at |Q_op|.
|
|||||||
can use: >
|
can use: >
|
||||||
if has("multi_byte_encoding")
|
if has("multi_byte_encoding")
|
||||||
<
|
<
|
||||||
Normally 'encoding' will be equal to your current locale. This will
|
|
||||||
be the default if Vim recognizes your environment settings, otherwise
|
|
||||||
"utf-8" is used.
|
|
||||||
|
|
||||||
When you set this option, it fires the |EncodingChanged| autocommand
|
When you set this option, it fires the |EncodingChanged| autocommand
|
||||||
event so that you can set up fonts if necessary.
|
event so that you can set up fonts if necessary.
|
||||||
|
|
||||||
@@ -2172,9 +2168,6 @@ A jump table for the options with a short description can be found at |Q_op|.
|
|||||||
setting 'encoding' to one of these values instead of utf-8 only has
|
setting 'encoding' to one of these values instead of utf-8 only has
|
||||||
effect for encoding used for files when 'fileencoding' is empty.
|
effect for encoding used for files when 'fileencoding' is empty.
|
||||||
|
|
||||||
When 'encoding' is set to a Unicode encoding, and 'fileencodings' was
|
|
||||||
not set yet, the default for 'fileencodings' is changed.
|
|
||||||
|
|
||||||
*'endofline'* *'eol'* *'noendofline'* *'noeol'*
|
*'endofline'* *'eol'* *'noendofline'* *'noeol'*
|
||||||
'endofline' 'eol' boolean (default on)
|
'endofline' 'eol' boolean (default on)
|
||||||
local to buffer
|
local to buffer
|
||||||
@@ -2345,9 +2338,7 @@ A jump table for the options with a short description can be found at |Q_op|.
|
|||||||
old short name was 'fe', which is no longer used.
|
old short name was 'fe', which is no longer used.
|
||||||
|
|
||||||
*'fileencodings'* *'fencs'*
|
*'fileencodings'* *'fencs'*
|
||||||
'fileencodings' 'fencs' string (default: "ucs-bom",
|
'fileencodings' 'fencs' string (default: "ucs-bom,utf-8,default,latin1")
|
||||||
"ucs-bom,utf-8,default,latin1" when
|
|
||||||
'encoding' is set to a Unicode value)
|
|
||||||
global
|
global
|
||||||
{only available when compiled with the |+multi_byte|
|
{only available when compiled with the |+multi_byte|
|
||||||
feature}
|
feature}
|
||||||
@@ -2387,9 +2378,8 @@ A jump table for the options with a short description can be found at |Q_op|.
|
|||||||
because Vim cannot detect an error, thus the encoding is always
|
because Vim cannot detect an error, thus the encoding is always
|
||||||
accepted.
|
accepted.
|
||||||
The special value "default" can be used for the encoding from the
|
The special value "default" can be used for the encoding from the
|
||||||
environment. This is the default value for 'encoding'. It is useful
|
environment. It is useful when 'encoding' is set to "utf-8" and
|
||||||
when 'encoding' is set to "utf-8" and your environment uses a
|
your environment uses a non-latin1 encoding, such as Russian.
|
||||||
non-latin1 encoding, such as Russian.
|
|
||||||
When 'encoding' is "utf-8" and a file contains an illegal byte
|
When 'encoding' is "utf-8" and a file contains an illegal byte
|
||||||
sequence it won't be recognized as UTF-8. You can use the |8g8|
|
sequence it won't be recognized as UTF-8. You can use the |8g8|
|
||||||
command to find the illegal byte sequence.
|
command to find the illegal byte sequence.
|
||||||
@@ -3776,10 +3766,8 @@ A jump table for the options with a short description can be found at |Q_op|.
|
|||||||
change 'iskeyword' instead.
|
change 'iskeyword' instead.
|
||||||
|
|
||||||
*'iskeyword'* *'isk'*
|
*'iskeyword'* *'isk'*
|
||||||
'iskeyword' 'isk' string (Vim default for
|
'iskeyword' 'isk' string (default: @,48-57,_,192-255
|
||||||
Win32: @,48-57,_,128-167,224-235
|
Vi default: @,48-57,_)
|
||||||
otherwise: @,48-57,_,192-255
|
|
||||||
Vi default: @,48-57,_)
|
|
||||||
local to buffer
|
local to buffer
|
||||||
Keywords are used in searching and recognizing with many commands:
|
Keywords are used in searching and recognizing with many commands:
|
||||||
"w", "*", "[i", etc. It is also used for "\k" in a |pattern|. See
|
"w", "*", "[i", etc. It is also used for "\k" in a |pattern|. See
|
||||||
@@ -3791,8 +3779,7 @@ A jump table for the options with a short description can be found at |Q_op|.
|
|||||||
When the 'lisp' option is on the '-' character is always included.
|
When the 'lisp' option is on the '-' character is always included.
|
||||||
|
|
||||||
*'isprint'* *'isp'*
|
*'isprint'* *'isp'*
|
||||||
'isprint' 'isp' string (default for MS-DOS, Win32, and Macintosh:
|
'isprint' 'isp' string (default: "@,161-255")
|
||||||
"@,~-255"; otherwise: "@,161-255")
|
|
||||||
global
|
global
|
||||||
The characters given by this option are displayed directly on the
|
The characters given by this option are displayed directly on the
|
||||||
screen. It is also used for "\p" in a |pattern|. The characters from
|
screen. It is also used for "\p" in a |pattern|. The characters from
|
||||||
|
|||||||
@@ -798,6 +798,8 @@ EXTERN bool enc_utf8 INIT(= false); /* UTF-8 encoded Unicode */
|
|||||||
EXTERN int enc_latin1like INIT(= TRUE); /* 'encoding' is latin1 comp. */
|
EXTERN int enc_latin1like INIT(= TRUE); /* 'encoding' is latin1 comp. */
|
||||||
EXTERN int has_mbyte INIT(= 0); /* any multi-byte encoding */
|
EXTERN int has_mbyte INIT(= 0); /* any multi-byte encoding */
|
||||||
|
|
||||||
|
/// Encoding used when 'fencs' is set to "default"
|
||||||
|
EXTERN char_u *fenc_default INIT(= NULL);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To speed up BYTELEN() we fill a table with the byte lengths whenever
|
* To speed up BYTELEN() we fill a table with the byte lengths whenever
|
||||||
|
|||||||
@@ -568,11 +568,6 @@ char_u * mb_init(void)
|
|||||||
/* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */
|
/* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */
|
||||||
screenalloc(false);
|
screenalloc(false);
|
||||||
|
|
||||||
/* When using Unicode, set default for 'fileencodings'. */
|
|
||||||
if (enc_utf8 && !option_was_set((char_u *)"fencs"))
|
|
||||||
set_string_option_direct((char_u *)"fencs", -1,
|
|
||||||
(char_u *)"ucs-bom,utf-8,default,latin1", OPT_FREE, 0);
|
|
||||||
|
|
||||||
#ifdef HAVE_WORKING_LIBINTL
|
#ifdef HAVE_WORKING_LIBINTL
|
||||||
/* GNU gettext 0.10.37 supports this feature: set the codeset used for
|
/* GNU gettext 0.10.37 supports this feature: set the codeset used for
|
||||||
* translated messages independently from the current locale. */
|
* translated messages independently from the current locale. */
|
||||||
@@ -2417,11 +2412,8 @@ char_u *enc_canonize(char_u *enc) FUNC_ATTR_NONNULL_RET
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (STRCMP(enc, "default") == 0) {
|
if (STRCMP(enc, "default") == 0) {
|
||||||
/* Use the default encoding as it's found by set_init_1(). */
|
// Use the default encoding as found by set_init_1().
|
||||||
char_u *r = get_encoding_default();
|
return vim_strsave(fenc_default);
|
||||||
if (r == NULL)
|
|
||||||
r = (char_u *)"latin1";
|
|
||||||
return vim_strsave(r);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* copy "enc" to allocated memory, with room for two '-' */
|
/* copy "enc" to allocated memory, with room for two '-' */
|
||||||
|
|||||||
@@ -233,12 +233,6 @@ typedef struct vimoption {
|
|||||||
#define P_CURSWANT 0x2000000U /* update curswant required; not needed when
|
#define P_CURSWANT 0x2000000U /* update curswant required; not needed when
|
||||||
* there is a redraw flag */
|
* there is a redraw flag */
|
||||||
|
|
||||||
#define ISK_LATIN1 (char_u *)"@,48-57,_,192-255"
|
|
||||||
|
|
||||||
/* 'isprint' for latin1 is also used for MS-Windows cp1252, where 0x80 is used
|
|
||||||
* for the currency sign. */
|
|
||||||
# define ISP_LATIN1 (char_u *)"@,161-255"
|
|
||||||
|
|
||||||
#define HIGHLIGHT_INIT \
|
#define HIGHLIGHT_INIT \
|
||||||
"8:SpecialKey,~:EndOfBuffer,z:TermCursor,Z:TermCursorNC,@:NonText," \
|
"8:SpecialKey,~:EndOfBuffer,z:TermCursor,Z:TermCursorNC,@:NonText," \
|
||||||
"d:Directory,e:ErrorMsg,i:IncSearch,l:Search,m:MoreMsg,M:ModeMsg,n:LineNr," \
|
"d:Directory,e:ErrorMsg,i:IncSearch,l:Search,m:MoreMsg,M:ModeMsg,n:LineNr," \
|
||||||
@@ -776,59 +770,18 @@ void set_init_1(void)
|
|||||||
/* Parse default for 'listchars'. */
|
/* Parse default for 'listchars'. */
|
||||||
(void)set_chars_option(&p_lcs);
|
(void)set_chars_option(&p_lcs);
|
||||||
|
|
||||||
/* enc_locale() will try to find the encoding of the current locale. */
|
// enc_locale() will try to find the encoding of the current locale.
|
||||||
|
// This will be used when 'default' is used as encoding specifier
|
||||||
|
// in 'fileencodings'
|
||||||
char_u *p = enc_locale();
|
char_u *p = enc_locale();
|
||||||
if (p != NULL) {
|
if (p == NULL) {
|
||||||
char_u *save_enc;
|
// use utf-8 as 'default' if locale encoding can't be detected.
|
||||||
|
p = vim_strsave((char_u *)"utf-8");
|
||||||
/* Try setting 'encoding' and check if the value is valid.
|
|
||||||
* If not, go back to the default "utf-8". */
|
|
||||||
save_enc = p_enc;
|
|
||||||
p_enc = (char_u *) p;
|
|
||||||
if (STRCMP(p_enc, "gb18030") == 0) {
|
|
||||||
/* We don't support "gb18030", but "cp936" is a good substitute
|
|
||||||
* for practical purposes, thus use that. It's not an alias to
|
|
||||||
* still support conversion between gb18030 and utf-8. */
|
|
||||||
p_enc = vim_strsave((char_u *)"cp936");
|
|
||||||
xfree(p);
|
|
||||||
}
|
|
||||||
if (mb_init() == NULL) {
|
|
||||||
opt_idx = findoption((char_u *)"encoding");
|
|
||||||
if (opt_idx >= 0) {
|
|
||||||
options[opt_idx].def_val[VI_DEFAULT] = p_enc;
|
|
||||||
options[opt_idx].flags |= P_DEF_ALLOCED;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(MSWIN) || defined(MACOS)
|
|
||||||
if (STRCMP(p_enc, "latin1") == 0
|
|
||||||
|| enc_utf8
|
|
||||||
) {
|
|
||||||
/* Adjust the default for 'isprint' and 'iskeyword' to match
|
|
||||||
* latin1. */
|
|
||||||
set_string_option_direct((char_u *)"isp", -1,
|
|
||||||
ISP_LATIN1, OPT_FREE, SID_NONE);
|
|
||||||
set_string_option_direct((char_u *)"isk", -1,
|
|
||||||
ISK_LATIN1, OPT_FREE, SID_NONE);
|
|
||||||
opt_idx = findoption((char_u *)"isp");
|
|
||||||
if (opt_idx >= 0)
|
|
||||||
options[opt_idx].def_val[VIM_DEFAULT] = ISP_LATIN1;
|
|
||||||
opt_idx = findoption((char_u *)"isk");
|
|
||||||
if (opt_idx >= 0)
|
|
||||||
options[opt_idx].def_val[VIM_DEFAULT] = ISK_LATIN1;
|
|
||||||
(void)init_chartab();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} else {
|
|
||||||
xfree(p_enc);
|
|
||||||
// mb_init() failed; fallback to utf8 and try again.
|
|
||||||
p_enc = save_enc;
|
|
||||||
mb_init();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// enc_locale() failed; initialize the default (utf8).
|
|
||||||
mb_init();
|
|
||||||
}
|
}
|
||||||
|
fenc_default = p;
|
||||||
|
|
||||||
|
// Initialize multibyte (utf-8) handling
|
||||||
|
mb_init();
|
||||||
|
|
||||||
// Don't change &encoding when resetting to defaults with ":set all&".
|
// Don't change &encoding when resetting to defaults with ":set all&".
|
||||||
opt_idx = findoption((char_u *)"encoding");
|
opt_idx = findoption((char_u *)"encoding");
|
||||||
@@ -4669,16 +4622,6 @@ char_u *get_highlight_default(void)
|
|||||||
return (char_u *)NULL;
|
return (char_u *)NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
char_u *get_encoding_default(void)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
i = findoption((char_u *)"enc");
|
|
||||||
if (i >= 0)
|
|
||||||
return options[i].def_val[VI_DEFAULT];
|
|
||||||
return (char_u *)NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Translate a string like "t_xx", "<t_xx>" or "<S-Tab>" to a key number.
|
* Translate a string like "t_xx", "<t_xx>" or "<S-Tab>" to a key number.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -748,7 +748,7 @@ return {
|
|||||||
type='string', list='comma', scope={'global'},
|
type='string', list='comma', scope={'global'},
|
||||||
vi_def=true,
|
vi_def=true,
|
||||||
varname='p_fencs',
|
varname='p_fencs',
|
||||||
defaults={if_true={vi="ucs-bom"}}
|
defaults={if_true={vi="ucs-bom,utf-8,default,latin1"}}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
full_name='fileformat', abbreviation='ff',
|
full_name='fileformat', abbreviation='ff',
|
||||||
@@ -1285,7 +1285,7 @@ return {
|
|||||||
vim=true,
|
vim=true,
|
||||||
alloced=true,
|
alloced=true,
|
||||||
varname='p_isk',
|
varname='p_isk',
|
||||||
defaults={if_true={vi="@,48-57,_", vim=macros('ISK_LATIN1')}}
|
defaults={if_true={vi="@,48-57,_", vim="@,48-57,_,192-255"}}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
full_name='isprint', abbreviation='isp',
|
full_name='isprint', abbreviation='isp',
|
||||||
@@ -1294,10 +1294,7 @@ return {
|
|||||||
vi_def=true,
|
vi_def=true,
|
||||||
redraw={'all_windows'},
|
redraw={'all_windows'},
|
||||||
varname='p_isp',
|
varname='p_isp',
|
||||||
defaults={
|
defaults={if_true={vi="@,161-255"}
|
||||||
condition='MSWIN',
|
|
||||||
if_true={vi="@,~-255"},
|
|
||||||
if_false={vi=macros("ISP_LATIN1")}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -21,20 +21,21 @@ describe('&encoding', function()
|
|||||||
eq(3, eval('strwidth("Bär")'))
|
eq(3, eval('strwidth("Bär")'))
|
||||||
end)
|
end)
|
||||||
|
|
||||||
it('is not changed by `set all&`', function()
|
it('can be changed before startup', function()
|
||||||
-- we need to set &encoding to something non-default
|
clear('set enc=latin1')
|
||||||
-- use 'latin1' when enc&vi is 'utf-8', 'utf-8' otherwise
|
execute('set encoding=utf-8')
|
||||||
execute('set fenc=default')
|
-- error message expected
|
||||||
local enc_default, enc_other, width = eval('&fenc'), 'utf-8', 3
|
feed('<cr>')
|
||||||
if enc_default == 'utf-8' then
|
eq('latin1', eval('&encoding'))
|
||||||
enc_other = 'latin1'
|
eq(4, eval('strwidth("Bär")'))
|
||||||
width = 4 -- utf-8 string 'Bär' will count as 4 latin1 chars
|
end)
|
||||||
end
|
|
||||||
|
|
||||||
clear('set enc=' .. enc_other)
|
it('is not changed by `set all&`', function()
|
||||||
|
-- we need to set &encoding to something non-default. Use 'latin1'
|
||||||
|
clear('set enc=latin1')
|
||||||
execute('set all&')
|
execute('set all&')
|
||||||
eq(enc_other, eval('&encoding'))
|
eq('latin1', eval('&encoding'))
|
||||||
eq(width, eval('strwidth("Bär")'))
|
eq(4, eval('strwidth("Bär")'))
|
||||||
end)
|
end)
|
||||||
|
|
||||||
end)
|
end)
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ local Session = require('nvim.session')
|
|||||||
|
|
||||||
local nvim_prog = os.getenv('NVIM_PROG') or 'build/bin/nvim'
|
local nvim_prog = os.getenv('NVIM_PROG') or 'build/bin/nvim'
|
||||||
local nvim_argv = {nvim_prog, '-u', 'NONE', '-i', 'NONE', '-N',
|
local nvim_argv = {nvim_prog, '-u', 'NONE', '-i', 'NONE', '-N',
|
||||||
'--cmd', 'set shortmess+=I background=light noswapfile noautoindent laststatus=1 encoding=utf-8 undodir=. directory=. viewdir=. backupdir=.',
|
'--cmd', 'set shortmess+=I background=light noswapfile noautoindent laststatus=1 undodir=. directory=. viewdir=. backupdir=.',
|
||||||
'--embed'}
|
'--embed'}
|
||||||
|
|
||||||
-- Formulate a path to the directory containing nvim. We use this to
|
-- Formulate a path to the directory containing nvim. We use this to
|
||||||
|
|||||||
Reference in New Issue
Block a user