refactor: replace utf_convert with utf8proc conversion functions

This commit is contained in:
dundargoc
2024-06-12 21:14:03 +02:00
committed by dundargoc
parent 32e16cb0b6
commit 496091b632
2 changed files with 6 additions and 16 deletions

View File

@@ -5,13 +5,13 @@
-- (A) east asian width respectively. -- (A) east asian width respectively.
-- 2. combining table: same as the above, but characters inside are combining -- 2. combining table: same as the above, but characters inside are combining
-- characters (i.e. have general categories equal to Mn, Mc or Me). -- characters (i.e. have general categories equal to Mn, Mc or Me).
-- 3. foldCase, toLower and toUpper tables used to convert characters to -- 3. foldCase table used to convert characters to
-- folded/lower/upper variants. In these tables first two values are -- folded variants. In this table first two values are
-- character ranges: like in previous tables they are sorted and must be -- character ranges: like in previous tables they are sorted and must be
-- non-overlapping. Third value means step inside the range: e.g. if it is -- non-overlapping. Third value means step inside the range: e.g. if it is
-- 2 then interval applies only to first, third, fifth, … character in range. -- 2 then interval applies only to first, third, fifth, … character in range.
-- Fourth value is number that should be added to the codepoint to yield -- Fourth value is number that should be added to the codepoint to yield
-- folded/lower/upper codepoint. -- folded codepoint.
-- 4. emoji_wide and emoji_all tables: sorted lists of non-overlapping closed -- 4. emoji_wide and emoji_all tables: sorted lists of non-overlapping closed
-- intervals of Emoji characters. emoji_wide contains all the characters -- intervals of Emoji characters. emoji_wide contains all the characters
-- which don't have ambiguous or double width, and emoji_all has all Emojis. -- which don't have ambiguous or double width, and emoji_all has all Emojis.
@@ -129,13 +129,6 @@ local build_convert_table = function(ut_fp, props, cond_func, nl_index, table_na
ut_fp:write('};\n') ut_fp:write('};\n')
end end
local build_case_table = function(ut_fp, dataprops, table_name, index)
local cond_func = function(p)
return p[index] ~= ''
end
return build_convert_table(ut_fp, dataprops, cond_func, index, 'to' .. table_name)
end
local build_fold_table = function(ut_fp, foldprops) local build_fold_table = function(ut_fp, foldprops)
local cond_func = function(p) local cond_func = function(p)
return (p[2] == 'C' or p[2] == 'S') return (p[2] == 'C' or p[2] == 'S')
@@ -296,8 +289,6 @@ ud_fp:close()
local ut_fp = io.open(utf_tables_fname, 'w') local ut_fp = io.open(utf_tables_fname, 'w')
build_case_table(ut_fp, dataprops, 'Lower', 14)
build_case_table(ut_fp, dataprops, 'Upper', 13)
build_combining_table(ut_fp, dataprops) build_combining_table(ut_fp, dataprops)
local cf_fp = io.open(casefolding_fname, 'r') local cf_fp = io.open(casefolding_fname, 'r')

View File

@@ -32,6 +32,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <utf8proc.h>
#include <uv.h> #include <uv.h>
#include <wctype.h> #include <wctype.h>
@@ -1346,8 +1347,7 @@ int mb_toupper(int a)
return TOUPPER_LOC(a); return TOUPPER_LOC(a);
} }
// For any other characters use the above mapping table. return utf8proc_toupper(a);
return utf_convert(a, toUpper, ARRAY_SIZE(toUpper));
} }
bool mb_islower(int a) bool mb_islower(int a)
@@ -1374,8 +1374,7 @@ int mb_tolower(int a)
return TOLOWER_LOC(a); return TOLOWER_LOC(a);
} }
// For any other characters use the above mapping table. return utf8proc_tolower(a);
return utf_convert(a, toLower, ARRAY_SIZE(toLower));
} }
bool mb_isupper(int a) bool mb_isupper(int a)