mirror of
				https://github.com/neovim/neovim.git
				synced 2025-10-26 12:27:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			240 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
			
		
		
	
	
			240 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			Lua
		
	
	
	
	
	
| -- Script creates the following tables in unicode_tables.generated.h:
 | ||
| --
 | ||
| -- 1. doublewidth and ambiguous tables: sorted list of non-overlapping closed 
 | ||
| --    intervals. Codepoints in these intervals have double (W or F) or ambiguous 
 | ||
| --    (A) east asian width respectively.
 | ||
| -- 2. combining table: same as the above, but characters inside are combining 
 | ||
| --    characters (i.e. have general categories equal to Mn, Mc or Me).
 | ||
| -- 3. foldCase, toLower and toUpper tables used to convert characters to 
 | ||
| --    folded/lower/upper variants. In these tables first two values are 
 | ||
| --    character ranges: like in previous tables they are sorted and must be 
 | ||
| --    non-overlapping. Third value means step inside the range: e.g. if it is 
 | ||
| --    2 then interval applies only to first, third, fifth, … character in range. 
 | ||
| --    Fourth value is number that should be added to the codepoint to yield 
 | ||
| --    folded/lower/upper codepoint.
 | ||
| if arg[1] == '--help' then
 | ||
|   print('Usage:')
 | ||
|   print('  genunicodetables.lua UnicodeData.txt CaseFolding.txt ' ..
 | ||
|         'EastAsianWidth.txt')
 | ||
|   print('                       unicode_tables.generated.h')
 | ||
|   os.exit(0)
 | ||
| end
 | ||
| 
 | ||
| local unicodedata_fname = arg[1]
 | ||
| local casefolding_fname = arg[2]
 | ||
| local eastasianwidth_fname = arg[3]
 | ||
| 
 | ||
| local utf_tables_fname = arg[4]
 | ||
| 
 | ||
| local split_on_semicolons = function(s)
 | ||
|   local ret = {}
 | ||
|   local idx = 1
 | ||
|   while idx <= #s + 1 do
 | ||
|     item = s:match('^[^;]*', idx)
 | ||
|     idx = idx + #item + 1
 | ||
|     if idx <= #s + 1 then
 | ||
|       assert(s:sub(idx - 1, idx - 1) == ';')
 | ||
|     end
 | ||
|     item = item:gsub('^%s*', '')
 | ||
|     item = item:gsub('%s*$', '')
 | ||
|     table.insert(ret, item)
 | ||
|   end
 | ||
|   return ret
 | ||
| end
 | ||
| 
 | ||
| local fp_lines_to_lists = function(fp, n, has_comments)
 | ||
|   local ret = {}
 | ||
|   local line
 | ||
|   local i = 0
 | ||
|   while true do
 | ||
|     i = i + 1
 | ||
|     line = fp:read('*l')
 | ||
|     if not line then
 | ||
|       break
 | ||
|     end
 | ||
|     if (not has_comments
 | ||
|         or (line:sub(1, 1) ~= '#' and not line:match('^%s*$'))) then
 | ||
|       local l = split_on_semicolons(line)
 | ||
|       if #l ~= n then
 | ||
|         io.stderr:write(('Found %s items in line %u, expected %u\n'):format(
 | ||
|           #l, i, n))
 | ||
|         io.stderr:write('Line: ' .. line .. '\n')
 | ||
|         return nil
 | ||
|       end
 | ||
|       table.insert(ret, l)
 | ||
|     end
 | ||
|   end
 | ||
|   return ret
 | ||
| end
 | ||
| 
 | ||
| local parse_data_to_props = function(ud_fp)
 | ||
|   return fp_lines_to_lists(ud_fp, 15, false)
 | ||
| end
 | ||
| 
 | ||
| local parse_fold_props = function(cf_fp)
 | ||
|   return fp_lines_to_lists(cf_fp, 4, true)
 | ||
| end
 | ||
| 
 | ||
| local parse_width_props = function(eaw_fp)
 | ||
|   return fp_lines_to_lists(eaw_fp, 2, true)
 | ||
| end
 | ||
| 
 | ||
| local make_range = function(start, end_, step, add)
 | ||
|   if step and add then
 | ||
|     return ('  {0x%x, 0x%x, %d, %d},\n'):format(
 | ||
|       start, end_, step == 0 and -1 or step, add)
 | ||
|   else
 | ||
|     return ('  {0x%04x, 0x%04x},\n'):format(start, end_)
 | ||
|   end
 | ||
| end
 | ||
| 
 | ||
| local build_convert_table = function(ut_fp, props, cond_func, nl_index,
 | ||
|                                      table_name)
 | ||
|   ut_fp:write('static const convertStruct ' .. table_name .. '[] = {\n')
 | ||
|   local start = -1
 | ||
|   local end_ = -1
 | ||
|   local step = 0
 | ||
|   local add = -1
 | ||
|   for _, p in ipairs(props) do
 | ||
|     if cond_func(p) then
 | ||
|       local n = tonumber(p[1], 16)
 | ||
|       local nl = tonumber(p[nl_index], 16)
 | ||
|       if start >= 0 and add == (nl - n) and (step == 0 or n - end_ == step) then
 | ||
|         -- Continue with the same range.
 | ||
|         step = n - end_
 | ||
|         end_ = n
 | ||
|       else
 | ||
|         if start >= 0 then
 | ||
|           -- Produce previous range.
 | ||
|           ut_fp:write(make_range(start, end_, step, add))
 | ||
|         end
 | ||
|         start = n
 | ||
|         end_ = n
 | ||
|         step = 0
 | ||
|         add = nl - n
 | ||
|       end
 | ||
|     end
 | ||
|   end
 | ||
|   if start >= 0 then
 | ||
|     ut_fp:write(make_range(start, end_, step, add))
 | ||
|   end
 | ||
|   ut_fp:write('};\n')
 | ||
| end
 | ||
| 
 | ||
| local build_case_table = function(ut_fp, dataprops, table_name, index)
 | ||
|   local cond_func = function(p)
 | ||
|     return p[index] ~= ''
 | ||
|   end
 | ||
|   return build_convert_table(ut_fp, dataprops, cond_func, index,
 | ||
|                              'to' .. table_name)
 | ||
| end
 | ||
| 
 | ||
| local build_fold_table = function(ut_fp, foldprops)
 | ||
|   local cond_func = function(p)
 | ||
|     return (p[2] == 'C' or p[2] == 'S')
 | ||
|   end
 | ||
|   return build_convert_table(ut_fp, foldprops, cond_func, 3, 'foldCase')
 | ||
| end
 | ||
| 
 | ||
| local build_combining_table = function(ut_fp, dataprops)
 | ||
|   ut_fp:write('static const struct interval combining[] = {\n')
 | ||
|   local start = -1
 | ||
|   local end_ = -1
 | ||
|   for _, p in ipairs(dataprops) do
 | ||
|     if (({Mn=true, Mc=true, Me=true})[p[3]]) then
 | ||
|       local n = tonumber(p[1], 16)
 | ||
|       if start >= 0 and end_ + 1 == n then
 | ||
|         -- Continue with the same range.
 | ||
|         end_ = n
 | ||
|       else
 | ||
|         if start >= 0 then
 | ||
|           -- Produce previous range.
 | ||
|           ut_fp:write(make_range(start, end_))
 | ||
|         end
 | ||
|         start = n
 | ||
|         end_ = n
 | ||
|       end
 | ||
|     end
 | ||
|   end
 | ||
|   if start >= 0 then
 | ||
|     ut_fp:write(make_range(start, end_))
 | ||
|   end
 | ||
|   ut_fp:write('};\n')
 | ||
| end
 | ||
| 
 | ||
| local build_width_table = function(ut_fp, dataprops, widthprops, widths,
 | ||
|                                    table_name)
 | ||
|   ut_fp:write('static const struct interval ' .. table_name .. '[] = {\n')
 | ||
|   local start = -1
 | ||
|   local end_ = -1
 | ||
|   local dataidx = 1
 | ||
|   for _, p in ipairs(widthprops) do
 | ||
|     if widths[p[2]:sub(1, 1)] then
 | ||
|       local rng_start, rng_end = p[1]:find('%.%.')
 | ||
|       local n, n_last
 | ||
|       if rng_start then
 | ||
|         -- It is a range. We don’t check for composing char then.
 | ||
|         n = tonumber(p[1]:sub(1, rng_start - 1), 16)
 | ||
|         n_last = tonumber(p[1]:sub(rng_end + 1), 16)
 | ||
|       else
 | ||
|         n = tonumber(p[1], 16)
 | ||
|         n_last = n
 | ||
|       end
 | ||
|       local dn
 | ||
|       while true do
 | ||
|         dn = tonumber(dataprops[dataidx][1], 16)
 | ||
|         if dn >= n then
 | ||
|           break
 | ||
|         end
 | ||
|         dataidx = dataidx + 1
 | ||
|       end
 | ||
|       if dn ~= n and n_last == n then
 | ||
|         io.stderr:write('Cannot find character ' .. n .. ' in data table.\n')
 | ||
|       end
 | ||
|       -- Only use the char when it’s not a composing char.
 | ||
|       -- But use all chars from a range.
 | ||
|       local dp = dataprops[dataidx]
 | ||
|       if (n_last > n) or (not (({Mn=true, Mc=true, Me=true})[dp[3]])) then
 | ||
|         if start >= 0 and end_ + 1 == n then
 | ||
|           -- Continue with the same range.
 | ||
|         else
 | ||
|           if start >= 0 then
 | ||
|             ut_fp:write(make_range(start, end_))
 | ||
|           end
 | ||
|           start = n
 | ||
|         end
 | ||
|         end_ = n_last
 | ||
|       end
 | ||
|     end
 | ||
|   end
 | ||
|   if start >= 0 then
 | ||
|     ut_fp:write(make_range(start, end_))
 | ||
|   end
 | ||
|   ut_fp:write('};\n')
 | ||
| end
 | ||
| 
 | ||
| local ud_fp = io.open(unicodedata_fname, 'r')
 | ||
| local dataprops = parse_data_to_props(ud_fp)
 | ||
| ud_fp:close()
 | ||
| 
 | ||
| local ut_fp = io.open(utf_tables_fname, 'w')
 | ||
| 
 | ||
| build_case_table(ut_fp, dataprops, 'Lower', 14)
 | ||
| build_case_table(ut_fp, dataprops, 'Upper', 13)
 | ||
| build_combining_table(ut_fp, dataprops)
 | ||
| 
 | ||
| local cf_fp = io.open(casefolding_fname, 'r')
 | ||
| local foldprops = parse_fold_props(cf_fp)
 | ||
| cf_fp:close()
 | ||
| 
 | ||
| build_fold_table(ut_fp, foldprops)
 | ||
| 
 | ||
| local eaw_fp = io.open(eastasianwidth_fname, 'r')
 | ||
| local widthprops = parse_width_props(eaw_fp)
 | ||
| eaw_fp:close()
 | ||
| 
 | ||
| build_width_table(ut_fp, dataprops, widthprops, {W=true, F=true}, 'doublewidth')
 | ||
| build_width_table(ut_fp, dataprops, widthprops, {A=true}, 'ambiguous')
 | ||
| 
 | ||
| ut_fp:close()
 | 
