mirror of
				https://github.com/neovim/neovim.git
				synced 2025-11-04 01:34:25 +00:00 
			
		
		
		
	eval/decode: Fail on control and invalid unicode characters
This commit is contained in:
		@@ -264,8 +264,8 @@ int json_decode_string(const char *const buf, const size_t len,
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
      case '"': {
 | 
					      case '"': {
 | 
				
			||||||
        size_t len = 0;
 | 
					        size_t len = 0;
 | 
				
			||||||
        const char *s;
 | 
					        const char *const s = ++p;
 | 
				
			||||||
        for (s = ++p; p < e && *p != '"'; p++) {
 | 
					        while (p < e && *p != '"') {
 | 
				
			||||||
          if (*p == '\\') {
 | 
					          if (*p == '\\') {
 | 
				
			||||||
            p++;
 | 
					            p++;
 | 
				
			||||||
            if (p == e) {
 | 
					            if (p == e) {
 | 
				
			||||||
@@ -285,9 +285,10 @@ int json_decode_string(const char *const buf, const size_t len,
 | 
				
			|||||||
                        p - 1);
 | 
					                        p - 1);
 | 
				
			||||||
                  goto json_decode_string_fail;
 | 
					                  goto json_decode_string_fail;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                // One UTF-8 character below U+10000 can take up to 3 bytes
 | 
					                // One UTF-8 character below U+10000 can take up to 3 bytes,
 | 
				
			||||||
 | 
					                // above up to 6, but they are encoded using two \u escapes.
 | 
				
			||||||
                len += 3;
 | 
					                len += 3;
 | 
				
			||||||
                p += 4;
 | 
					                p += 5;
 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
              }
 | 
					              }
 | 
				
			||||||
              case '\\':
 | 
					              case '\\':
 | 
				
			||||||
@@ -299,6 +300,7 @@ int json_decode_string(const char *const buf, const size_t len,
 | 
				
			|||||||
              case 'r':
 | 
					              case 'r':
 | 
				
			||||||
              case 'f': {
 | 
					              case 'f': {
 | 
				
			||||||
                len++;
 | 
					                len++;
 | 
				
			||||||
 | 
					                p++;
 | 
				
			||||||
                break;
 | 
					                break;
 | 
				
			||||||
              }
 | 
					              }
 | 
				
			||||||
              default: {
 | 
					              default: {
 | 
				
			||||||
@@ -307,7 +309,30 @@ int json_decode_string(const char *const buf, const size_t len,
 | 
				
			|||||||
              }
 | 
					              }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
          } else {
 | 
					          } else {
 | 
				
			||||||
            len++;
 | 
					            uint8_t p_byte = (uint8_t) *p;
 | 
				
			||||||
 | 
					            // unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
 | 
				
			||||||
 | 
					            if (p_byte < 0x20) {
 | 
				
			||||||
 | 
					              EMSG2(_("E474: ASCII control characters cannot be present "
 | 
				
			||||||
 | 
					                      "inside string: %s"), p);
 | 
				
			||||||
 | 
					              goto json_decode_string_fail;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            const int ch = utf_ptr2char((char_u *) p);
 | 
				
			||||||
 | 
					            // All characters above U+007F are encoded using two or more bytes
 | 
				
			||||||
 | 
					            // and thus cannot possibly be equal to *p. But utf_ptr2char({0xFF,
 | 
				
			||||||
 | 
					            // 0}) will return 0xFF, even though 0xFF cannot start any UTF-8
 | 
				
			||||||
 | 
					            // code point at all.
 | 
				
			||||||
 | 
					            if (ch >= 0x80 && p_byte == ch) {
 | 
				
			||||||
 | 
					              EMSG2(_("E474: Only UTF-8 strings allowed: %s"), p);
 | 
				
			||||||
 | 
					              goto json_decode_string_fail;
 | 
				
			||||||
 | 
					            } else if (ch > 0x10FFFF) {
 | 
				
			||||||
 | 
					              EMSG2(_("E474: Only UTF-8 code points up to U+10FFFF "
 | 
				
			||||||
 | 
					                      "are allowed to appear unescaped: %s"), p);
 | 
				
			||||||
 | 
					              goto json_decode_string_fail;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            const size_t ch_len = (size_t) utf_char2len(ch);
 | 
				
			||||||
 | 
					            assert(ch_len == (size_t) (ch ? utf_ptr2len((char_u *) p) : 1));
 | 
				
			||||||
 | 
					            len += ch_len;
 | 
				
			||||||
 | 
					            p += ch_len;
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        if (*p != '"') {
 | 
					        if (*p != '"') {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -235,6 +235,67 @@ describe('jsondecode() function', function()
 | 
				
			|||||||
    eq('', funcs.jsondecode('""'))
 | 
					    eq('', funcs.jsondecode('""'))
 | 
				
			||||||
    eq('\\/"\t\b\n\r\f', funcs.jsondecode([["\\\/\"\t\b\n\r\f"]]))
 | 
					    eq('\\/"\t\b\n\r\f', funcs.jsondecode([["\\\/\"\t\b\n\r\f"]]))
 | 
				
			||||||
    eq('/a', funcs.jsondecode([["\/a"]]))
 | 
					    eq('/a', funcs.jsondecode([["\/a"]]))
 | 
				
			||||||
 | 
					    -- Unicode characters: 2-byte, 3-byte, 4-byte
 | 
				
			||||||
 | 
					    eq({
 | 
				
			||||||
 | 
					      '«',
 | 
				
			||||||
 | 
					      'ફ',
 | 
				
			||||||
 | 
					      '\xF0\x90\x80\x80',
 | 
				
			||||||
 | 
					    }, funcs.jsondecode({
 | 
				
			||||||
 | 
					      '[',
 | 
				
			||||||
 | 
					      '"«",',
 | 
				
			||||||
 | 
					      '"ફ",',
 | 
				
			||||||
 | 
					      '"\xF0\x90\x80\x80"',
 | 
				
			||||||
 | 
					      ']',
 | 
				
			||||||
 | 
					    }))
 | 
				
			||||||
 | 
					  end)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  it('fails on strings with invalid bytes', function()
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \255"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xFF\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: ASCII control characters cannot be present inside string: ',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode(["\\"\\n\\""])'))
 | 
				
			||||||
 | 
					    -- 0xC2 starts 2-byte unicode character
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \194"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xC2\\"")'))
 | 
				
			||||||
 | 
					    -- 0xE0 0xAA starts 3-byte unicode character
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \224"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xE0\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \224\170"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xE0\\xAA\\"")'))
 | 
				
			||||||
 | 
					    -- 0xF0 0x90 0x80 starts 4-byte unicode character
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \240"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xF0\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \240\144"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xF0\\x90\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \240\144\128"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xF0\\x90\\x80\\"")'))
 | 
				
			||||||
 | 
					    -- 0xF9 0x80 0x80 0x80 starts 5-byte unicode character
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xF9\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80\x80"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xF9\x80\x80\x80"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\x80\\"")'))
 | 
				
			||||||
 | 
					    -- 0xFC 0x90 0x80 0x80 0x80 starts 6-byte unicode character
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xFC\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80\x80"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 strings allowed: \xFC\x90\x80\x80\x80"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\x80\\"")'))
 | 
				
			||||||
 | 
					    -- Specification does not allow unquoted characters above 0x10FFFF
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \xF9\x80\x80\x80\x80"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xF9\\x80\\x80\\x80\\x80\\"")'))
 | 
				
			||||||
 | 
					    eq('Vim(call):E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \xFC\x90\x80\x80\x80\x80"',
 | 
				
			||||||
 | 
					       exc_exec('call jsondecode("\\t\\"\\xFC\\x90\\x80\\x80\\x80\\x80\\"")'))
 | 
				
			||||||
 | 
					    -- '"\xF9\x80\x80\x80\x80"',
 | 
				
			||||||
 | 
					    -- '"\xFC\x90\x80\x80\x80\x80"',
 | 
				
			||||||
  end)
 | 
					  end)
 | 
				
			||||||
end)
 | 
					end)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user