lua: minimal UTF-16 support needed for LSP

This commit is contained in:
Björn Linse
2019-08-04 15:12:07 +02:00
parent ce628e1187
commit 1f54f68732
4 changed files with 148 additions and 15 deletions

View File

@@ -112,6 +112,65 @@ static int nlua_stricmp(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
return 1;
}
/// convert byte index to UTF-32 and UTF-16 indicies
///
/// Expects a string and an optional index. If no index is supplied, the length
/// of the string is returned.
///
/// Returns two values: the UTF-32 and UTF-16 indicies.
static int nlua_str_utfindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
{
size_t s1_len;
const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
intptr_t idx;
if (lua_gettop(lstate) >= 2) {
idx = luaL_checkinteger(lstate, 2);
if (idx < 0 || idx > (intptr_t)s1_len) {
return luaL_error(lstate, "index out of range");
}
} else {
idx = (intptr_t)s1_len;
}
size_t codepoints = 0, codeunits = 0;
mb_utflen((const char_u *)s1, (size_t)idx, &codepoints, &codeunits);
lua_pushinteger(lstate, (long)codepoints);
lua_pushinteger(lstate, (long)codeunits);
return 2;
}
/// convert UTF-32 or UTF-16 indicies to byte index.
///
/// Expects up to three args: string, index and use_utf16.
/// If use_utf16 is not supplied it defaults to false (use UTF-32)
///
/// Returns the byte index.
static int nlua_str_byteindex(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
{
size_t s1_len;
const char *s1 = luaL_checklstring(lstate, 1, &s1_len);
intptr_t idx = luaL_checkinteger(lstate, 2);
if (idx < 0) {
return luaL_error(lstate, "index out of range");
}
bool use_utf16 = false;
if (lua_gettop(lstate) >= 3) {
use_utf16 = lua_toboolean(lstate, 3);
}
ssize_t byteidx = mb_utf_index_to_bytes((const char_u *)s1, s1_len,
(size_t)idx, use_utf16);
if (byteidx == -1) {
return luaL_error(lstate, "index out of range");
}
lua_pushinteger(lstate, (long)byteidx);
return 1;
}
static void nlua_luv_error_event(void **argv)
{
char *error = (char *)argv[0];
@@ -220,6 +279,12 @@ static int nlua_state_init(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
// stricmp
lua_pushcfunction(lstate, &nlua_stricmp);
lua_setfield(lstate, -2, "stricmp");
// str_utfindex
lua_pushcfunction(lstate, &nlua_str_utfindex);
lua_setfield(lstate, -2, "str_utfindex");
// str_byteindex
lua_pushcfunction(lstate, &nlua_str_byteindex);
lua_setfield(lstate, -2, "str_byteindex");
// schedule
lua_pushcfunction(lstate, &nlua_schedule);
lua_setfield(lstate, -2, "schedule");

View File

@@ -1470,6 +1470,31 @@ void mb_utflen(const char_u *s, size_t len, size_t *codepoints,
*codeunits += count + extra;
}
ssize_t mb_utf_index_to_bytes(const char_u *s, size_t len,
size_t index, bool use_utf16_units)
FUNC_ATTR_NONNULL_ALL
{
size_t count = 0;
size_t clen, i;
if (index == 0) {
return 0;
}
for (i = 0; i < len && s[i] != NUL; i += clen) {
clen = utf_ptr2len_len(s+i, len-i);
// NB: gets the byte value of invalid sequence bytes.
// we only care whether the char fits in the BMP or not
int c = (clen > 1) ? utf_ptr2char(s+i) : s[i];
count++;
if (use_utf16_units && c > 0xFFFF) {
count++;
}
if (count >= index) {
return i+clen;
}
}
return -1;
}
/*
* Version of strnicmp() that handles multi-byte characters.