mirror of
				https://github.com/neovim/neovim.git
				synced 2025-11-04 09:44:31 +00:00 
			
		
		
		
	Merge pull request #3655 from bfredl/enc_default
Default to encoding=utf-8
This commit is contained in:
		@@ -2130,7 +2130,7 @@ A jump table for the options with a short description can be found at |Q_op|.
 | 
			
		||||
'edcompatible' 'ed'	Removed. |vim-differences| {Nvim}
 | 
			
		||||
 | 
			
		||||
					*'encoding'* *'enc'* *E543*
 | 
			
		||||
'encoding' 'enc'	string (default: "utf-8" or value from $LANG)
 | 
			
		||||
'encoding' 'enc'	string (default: "utf-8")
 | 
			
		||||
			global
 | 
			
		||||
			{only available when compiled with the |+multi_byte|
 | 
			
		||||
			feature}
 | 
			
		||||
@@ -2152,10 +2152,6 @@ A jump table for the options with a short description can be found at |Q_op|.
 | 
			
		||||
	can use: >
 | 
			
		||||
		if has("multi_byte_encoding")
 | 
			
		||||
<
 | 
			
		||||
	Normally 'encoding' will be equal to your current locale.  This will
 | 
			
		||||
	be the default if Vim recognizes your environment settings, otherwise
 | 
			
		||||
	"utf-8" is used.
 | 
			
		||||
 | 
			
		||||
	When you set this option, it fires the |EncodingChanged| autocommand
 | 
			
		||||
	event so that you can set up fonts if necessary.
 | 
			
		||||
 | 
			
		||||
@@ -2172,9 +2168,6 @@ A jump table for the options with a short description can be found at |Q_op|.
 | 
			
		||||
	setting 'encoding' to one of these values instead of utf-8 only has
 | 
			
		||||
	effect for encoding used for files when 'fileencoding' is empty.
 | 
			
		||||
 | 
			
		||||
	When 'encoding' is set to a Unicode encoding, and 'fileencodings' was
 | 
			
		||||
	not set yet, the default for 'fileencodings' is changed.
 | 
			
		||||
 | 
			
		||||
			*'endofline'* *'eol'* *'noendofline'* *'noeol'*
 | 
			
		||||
'endofline' 'eol'	boolean	(default on)
 | 
			
		||||
			local to buffer
 | 
			
		||||
@@ -2345,9 +2338,7 @@ A jump table for the options with a short description can be found at |Q_op|.
 | 
			
		||||
	old short name was 'fe', which is no longer used.
 | 
			
		||||
 | 
			
		||||
					*'fileencodings'* *'fencs'*
 | 
			
		||||
'fileencodings' 'fencs'	string (default: "ucs-bom",
 | 
			
		||||
				    "ucs-bom,utf-8,default,latin1" when
 | 
			
		||||
				    'encoding' is set to a Unicode value)
 | 
			
		||||
'fileencodings' 'fencs'	string (default: "ucs-bom,utf-8,default,latin1")
 | 
			
		||||
			global
 | 
			
		||||
			{only available when compiled with the |+multi_byte|
 | 
			
		||||
			feature}
 | 
			
		||||
@@ -2387,9 +2378,8 @@ A jump table for the options with a short description can be found at |Q_op|.
 | 
			
		||||
	because Vim cannot detect an error, thus the encoding is always
 | 
			
		||||
	accepted.
 | 
			
		||||
	The special value "default" can be used for the encoding from the
 | 
			
		||||
	environment.  This is the default value for 'encoding'.  It is useful
 | 
			
		||||
	when 'encoding' is set to "utf-8" and your environment uses a
 | 
			
		||||
	non-latin1 encoding, such as Russian.
 | 
			
		||||
	environment.  It is useful when 'encoding' is set to "utf-8" and
 | 
			
		||||
	your environment uses a non-latin1 encoding, such as Russian.
 | 
			
		||||
	When 'encoding' is "utf-8" and a file contains an illegal byte
 | 
			
		||||
	sequence it won't be recognized as UTF-8.  You can use the |8g8|
 | 
			
		||||
	command to find the illegal byte sequence.
 | 
			
		||||
@@ -3776,9 +3766,7 @@ A jump table for the options with a short description can be found at |Q_op|.
 | 
			
		||||
	change 'iskeyword' instead.
 | 
			
		||||
 | 
			
		||||
						*'iskeyword'* *'isk'*
 | 
			
		||||
'iskeyword' 'isk'	string (Vim default for
 | 
			
		||||
				  Win32:     @,48-57,_,128-167,224-235
 | 
			
		||||
				  otherwise: @,48-57,_,192-255
 | 
			
		||||
'iskeyword' 'isk'	string (default: @,48-57,_,192-255
 | 
			
		||||
				Vi default: @,48-57,_)
 | 
			
		||||
			local to buffer
 | 
			
		||||
	Keywords are used in searching and recognizing with many commands:
 | 
			
		||||
@@ -3791,8 +3779,7 @@ A jump table for the options with a short description can be found at |Q_op|.
 | 
			
		||||
	When the 'lisp' option is on the '-' character is always included.
 | 
			
		||||
 | 
			
		||||
						*'isprint'* *'isp'*
 | 
			
		||||
'isprint' 'isp'	string	(default for MS-DOS, Win32, and Macintosh:
 | 
			
		||||
				"@,~-255"; otherwise: "@,161-255")
 | 
			
		||||
'isprint' 'isp'	string	(default: "@,161-255")
 | 
			
		||||
			global
 | 
			
		||||
	The characters given by this option are displayed directly on the
 | 
			
		||||
	screen.  It is also used for "\p" in a |pattern|.  The characters from
 | 
			
		||||
 
 | 
			
		||||
@@ -798,6 +798,8 @@ EXTERN bool enc_utf8 INIT(= false);             /* UTF-8 encoded Unicode */
 | 
			
		||||
EXTERN int enc_latin1like INIT(= TRUE);         /* 'encoding' is latin1 comp. */
 | 
			
		||||
EXTERN int has_mbyte INIT(= 0);                 /* any multi-byte encoding */
 | 
			
		||||
 | 
			
		||||
/// Encoding used when 'fencs' is set to "default"
 | 
			
		||||
EXTERN char_u *fenc_default INIT(= NULL);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * To speed up BYTELEN() we fill a table with the byte lengths whenever
 | 
			
		||||
 
 | 
			
		||||
@@ -568,11 +568,6 @@ char_u * mb_init(void)
 | 
			
		||||
  /* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */
 | 
			
		||||
  screenalloc(false);
 | 
			
		||||
 | 
			
		||||
  /* When using Unicode, set default for 'fileencodings'. */
 | 
			
		||||
  if (enc_utf8 && !option_was_set((char_u *)"fencs"))
 | 
			
		||||
    set_string_option_direct((char_u *)"fencs", -1,
 | 
			
		||||
        (char_u *)"ucs-bom,utf-8,default,latin1", OPT_FREE, 0);
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_WORKING_LIBINTL
 | 
			
		||||
  /* GNU gettext 0.10.37 supports this feature: set the codeset used for
 | 
			
		||||
   * translated messages independently from the current locale. */
 | 
			
		||||
@@ -2417,11 +2412,8 @@ char_u *enc_canonize(char_u *enc) FUNC_ATTR_NONNULL_RET
 | 
			
		||||
  int i;
 | 
			
		||||
 | 
			
		||||
  if (STRCMP(enc, "default") == 0) {
 | 
			
		||||
    /* Use the default encoding as it's found by set_init_1(). */
 | 
			
		||||
    char_u *r = get_encoding_default();
 | 
			
		||||
    if (r == NULL)
 | 
			
		||||
      r = (char_u *)"latin1";
 | 
			
		||||
    return vim_strsave(r);
 | 
			
		||||
    // Use the default encoding as found by set_init_1().
 | 
			
		||||
    return vim_strsave(fenc_default);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /* copy "enc" to allocated memory, with room for two '-' */
 | 
			
		||||
 
 | 
			
		||||
@@ -233,12 +233,6 @@ typedef struct vimoption {
 | 
			
		||||
#define P_CURSWANT    0x2000000U /* update curswant required; not needed when
 | 
			
		||||
                                  * there is a redraw flag */
 | 
			
		||||
 | 
			
		||||
#define ISK_LATIN1  (char_u *)"@,48-57,_,192-255"
 | 
			
		||||
 | 
			
		||||
/* 'isprint' for latin1 is also used for MS-Windows cp1252, where 0x80 is used
 | 
			
		||||
 * for the currency sign. */
 | 
			
		||||
# define ISP_LATIN1 (char_u *)"@,161-255"
 | 
			
		||||
 | 
			
		||||
#define HIGHLIGHT_INIT \
 | 
			
		||||
  "8:SpecialKey,~:EndOfBuffer,z:TermCursor,Z:TermCursorNC,@:NonText," \
 | 
			
		||||
  "d:Directory,e:ErrorMsg,i:IncSearch,l:Search,m:MoreMsg,M:ModeMsg,n:LineNr," \
 | 
			
		||||
@@ -776,59 +770,18 @@ void set_init_1(void)
 | 
			
		||||
  /* Parse default for 'listchars'. */
 | 
			
		||||
  (void)set_chars_option(&p_lcs);
 | 
			
		||||
 | 
			
		||||
  /* enc_locale() will try to find the encoding of the current locale. */
 | 
			
		||||
  // enc_locale() will try to find the encoding of the current locale.
 | 
			
		||||
  // This will be used when 'default' is used as encoding specifier
 | 
			
		||||
  // in 'fileencodings'
 | 
			
		||||
  char_u *p = enc_locale();
 | 
			
		||||
  if (p != NULL) {
 | 
			
		||||
    char_u *save_enc;
 | 
			
		||||
 | 
			
		||||
    /* Try setting 'encoding' and check if the value is valid.
 | 
			
		||||
     * If not, go back to the default "utf-8". */
 | 
			
		||||
    save_enc = p_enc;
 | 
			
		||||
    p_enc = (char_u *) p;
 | 
			
		||||
    if (STRCMP(p_enc, "gb18030") == 0) {
 | 
			
		||||
      /* We don't support "gb18030", but "cp936" is a good substitute
 | 
			
		||||
       * for practical purposes, thus use that.  It's not an alias to
 | 
			
		||||
       * still support conversion between gb18030 and utf-8. */
 | 
			
		||||
      p_enc = vim_strsave((char_u *)"cp936");
 | 
			
		||||
      xfree(p);
 | 
			
		||||
    }
 | 
			
		||||
    if (mb_init() == NULL) {
 | 
			
		||||
      opt_idx = findoption((char_u *)"encoding");
 | 
			
		||||
      if (opt_idx >= 0) {
 | 
			
		||||
        options[opt_idx].def_val[VI_DEFAULT] = p_enc;
 | 
			
		||||
        options[opt_idx].flags |= P_DEF_ALLOCED;
 | 
			
		||||
  if (p == NULL) {
 | 
			
		||||
      // use utf-8 as 'default' if locale encoding can't be detected.
 | 
			
		||||
      p = vim_strsave((char_u *)"utf-8");
 | 
			
		||||
  }
 | 
			
		||||
  fenc_default = p;
 | 
			
		||||
 | 
			
		||||
#if defined(MSWIN) || defined(MACOS)
 | 
			
		||||
      if (STRCMP(p_enc, "latin1") == 0
 | 
			
		||||
          || enc_utf8
 | 
			
		||||
          ) {
 | 
			
		||||
        /* Adjust the default for 'isprint' and 'iskeyword' to match
 | 
			
		||||
         * latin1. */
 | 
			
		||||
        set_string_option_direct((char_u *)"isp", -1,
 | 
			
		||||
            ISP_LATIN1, OPT_FREE, SID_NONE);
 | 
			
		||||
        set_string_option_direct((char_u *)"isk", -1,
 | 
			
		||||
            ISK_LATIN1, OPT_FREE, SID_NONE);
 | 
			
		||||
        opt_idx = findoption((char_u *)"isp");
 | 
			
		||||
        if (opt_idx >= 0)
 | 
			
		||||
          options[opt_idx].def_val[VIM_DEFAULT] = ISP_LATIN1;
 | 
			
		||||
        opt_idx = findoption((char_u *)"isk");
 | 
			
		||||
        if (opt_idx >= 0)
 | 
			
		||||
          options[opt_idx].def_val[VIM_DEFAULT] = ISK_LATIN1;
 | 
			
		||||
        (void)init_chartab();
 | 
			
		||||
      }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    } else {
 | 
			
		||||
      xfree(p_enc);
 | 
			
		||||
      // mb_init() failed; fallback to utf8 and try again.
 | 
			
		||||
      p_enc = save_enc;
 | 
			
		||||
  // Initialize multibyte (utf-8) handling
 | 
			
		||||
  mb_init();
 | 
			
		||||
    }
 | 
			
		||||
  } else {
 | 
			
		||||
    // enc_locale() failed; initialize the default (utf8).
 | 
			
		||||
    mb_init();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Don't change &encoding when resetting to defaults with ":set all&".
 | 
			
		||||
  opt_idx = findoption((char_u *)"encoding");
 | 
			
		||||
@@ -4669,16 +4622,6 @@ char_u *get_highlight_default(void)
 | 
			
		||||
  return (char_u *)NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
char_u *get_encoding_default(void)
 | 
			
		||||
{
 | 
			
		||||
  int i;
 | 
			
		||||
 | 
			
		||||
  i = findoption((char_u *)"enc");
 | 
			
		||||
  if (i >= 0)
 | 
			
		||||
    return options[i].def_val[VI_DEFAULT];
 | 
			
		||||
  return (char_u *)NULL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Translate a string like "t_xx", "<t_xx>" or "<S-Tab>" to a key number.
 | 
			
		||||
 */
 | 
			
		||||
 
 | 
			
		||||
@@ -748,7 +748,7 @@ return {
 | 
			
		||||
      type='string', list='comma', scope={'global'},
 | 
			
		||||
      vi_def=true,
 | 
			
		||||
      varname='p_fencs',
 | 
			
		||||
      defaults={if_true={vi="ucs-bom"}}
 | 
			
		||||
      defaults={if_true={vi="ucs-bom,utf-8,default,latin1"}}
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      full_name='fileformat', abbreviation='ff',
 | 
			
		||||
@@ -1285,7 +1285,7 @@ return {
 | 
			
		||||
      vim=true,
 | 
			
		||||
      alloced=true,
 | 
			
		||||
      varname='p_isk',
 | 
			
		||||
      defaults={if_true={vi="@,48-57,_", vim=macros('ISK_LATIN1')}}
 | 
			
		||||
      defaults={if_true={vi="@,48-57,_", vim="@,48-57,_,192-255"}}
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
      full_name='isprint', abbreviation='isp',
 | 
			
		||||
@@ -1294,10 +1294,7 @@ return {
 | 
			
		||||
      vi_def=true,
 | 
			
		||||
      redraw={'all_windows'},
 | 
			
		||||
      varname='p_isp',
 | 
			
		||||
      defaults={
 | 
			
		||||
        condition='MSWIN',
 | 
			
		||||
        if_true={vi="@,~-255"},
 | 
			
		||||
        if_false={vi=macros("ISP_LATIN1")}
 | 
			
		||||
      defaults={if_true={vi="@,161-255"}
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
 
 | 
			
		||||
@@ -21,20 +21,21 @@ describe('&encoding', function()
 | 
			
		||||
    eq(3, eval('strwidth("Bär")'))
 | 
			
		||||
  end)
 | 
			
		||||
 | 
			
		||||
  it('can be changed before startup', function()
 | 
			
		||||
    clear('set enc=latin1')
 | 
			
		||||
    execute('set encoding=utf-8')
 | 
			
		||||
    -- error message expected
 | 
			
		||||
    feed('<cr>')
 | 
			
		||||
    eq('latin1', eval('&encoding'))
 | 
			
		||||
    eq(4, eval('strwidth("Bär")'))
 | 
			
		||||
  end)
 | 
			
		||||
 | 
			
		||||
  it('is not changed by `set all&`', function()
 | 
			
		||||
    -- we need to set &encoding to something non-default
 | 
			
		||||
    -- use 'latin1' when enc&vi is 'utf-8', 'utf-8' otherwise
 | 
			
		||||
    execute('set fenc=default')
 | 
			
		||||
    local enc_default, enc_other, width = eval('&fenc'), 'utf-8', 3
 | 
			
		||||
    if enc_default == 'utf-8' then
 | 
			
		||||
      enc_other = 'latin1'
 | 
			
		||||
      width = 4 -- utf-8 string 'Bär' will count as 4 latin1 chars
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    clear('set enc=' .. enc_other)
 | 
			
		||||
    -- we need to set &encoding to something non-default. Use 'latin1'
 | 
			
		||||
    clear('set enc=latin1')
 | 
			
		||||
    execute('set all&')
 | 
			
		||||
    eq(enc_other, eval('&encoding'))
 | 
			
		||||
    eq(width, eval('strwidth("Bär")'))
 | 
			
		||||
    eq('latin1', eval('&encoding'))
 | 
			
		||||
    eq(4, eval('strwidth("Bär")'))
 | 
			
		||||
  end)
 | 
			
		||||
 | 
			
		||||
end)
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ local Session = require('nvim.session')
 | 
			
		||||
 | 
			
		||||
local nvim_prog = os.getenv('NVIM_PROG') or 'build/bin/nvim'
 | 
			
		||||
local nvim_argv = {nvim_prog, '-u', 'NONE', '-i', 'NONE', '-N',
 | 
			
		||||
                   '--cmd', 'set shortmess+=I background=light noswapfile noautoindent laststatus=1 encoding=utf-8 undodir=. directory=. viewdir=. backupdir=.',
 | 
			
		||||
                   '--cmd', 'set shortmess+=I background=light noswapfile noautoindent laststatus=1 undodir=. directory=. viewdir=. backupdir=.',
 | 
			
		||||
                   '--embed'}
 | 
			
		||||
 | 
			
		||||
-- Formulate a path to the directory containing nvim.  We use this to
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user