package ucd import "core:strings" import "core:os" import "core:strconv" decode_rune :: proc(str: string) -> (cp1, cp2: rune, err: Error) { head, _, tail := strings.partition(str, "..") if _cp1, _ok := strconv.parse_int(head, 16); !_ok { return 0, 0, .Invalid_Hex_Number } else { cp1 = rune(_cp1) } if len(tail) == 0 { return cp1, cp1, nil } if _cp2, _ok := strconv.parse_int(tail, 16); !_ok { return 0, 0, .Invalid_Hex_Number } else { cp2 = rune(_cp2) } return } load_unicode_data :: proc(filename: string, allocator := context.allocator) -> (unicode_data: Unicode_Data, err: Error) { data := os.read_entire_file(filename, context.temp_allocator) or_return defer free_all(context.temp_allocator) first_cp: rune str := string(data) line_loop: for _line in strings.split_lines_iterator(&str) { // Ignore any comments line, _, _ := strings.partition(_line, "#") // Skip empty lines if len(line) == 0 { continue } is_range := false cp: rune name: string gc: General_Category num_6: string num_7: string nt := Numeric_Type.None field_num := 0 for _field in strings.split_iterator(&line, ";") { defer field_num += 1 field := strings.trim_space(_field) switch field_num { case 0: // Code point cp, _ = decode_rune(field) or_return case 1: // Name if len(field) > 9 && field[0] == '<' && strings.ends_with(field, ", First>") { first_cp = cp continue line_loop } if len(field) > 9 && field[0] == '<' && strings.ends_with(field, ", Last>") { name = strings.clone(field[1:len(field)-7], allocator) is_range = true } else { name = strings.clone(field[:], allocator) } case 2: // General_Category // NOTE: This is currently igorning a possible error it should probably be fixed gc, _ = string_to_general_category(field) case 3: // Canonical_Combining_Class case 4: // Bidi Class case 5: // Decomposition_Type and Decomposition_Mapping // Numeric_Type and Numeric_Value case 6: num_6 = field case 7: num_7 = field case 8: switch { case num_6 != "" && num_7 != "" && field != "" : nt = .Decimal case num_6 == "" && num_7 != "" && field != "" : nt = .Digit case num_6 == "" && num_7 == "" && field != "" : nt = .Numeric case: nt = .None } case 9: // Bidi mirrored case 10: // Unicode 1 Name (Obsolete as of 6.2.0) case 11: // should be null case 12: case 13: case 14: case: err = .Extra_Fields return } } if is_range { append(&unicode_data, Char_Range { gc = gc, first_cp = first_cp, last_cp = cp, name = name, nt = nt, }) } else { append(&unicode_data, Char{ gc = gc, cp = cp, name = name, nt = nt, }) } } return } destroy_unicode_data :: proc(unicode_data: Unicode_Data) { for point in unicode_data { switch p in point { case Char: delete(p.name) case Char_Range: delete(p.name) } } delete(unicode_data) } gc_ranges :: proc(ud: ^Unicode_Data, allocator := context.allocator) -> (lst: [General_Category]Dynamic_Range) { range := Range_Rune{ first = -1, last = -1, } gc: General_Category for point in ud { switch p in point { case Char: if range.first != -1 && (p.cp != range.last + 1 || p.gc != gc) { append_to_dynamic_range(&lst[gc], range, allocator) range.first = -1 range.last = -1 } range.first = rune(min(u32(range.first), u32(p.cp))) gc = p.gc range.last = p.cp case Char_Range: if range.first != -1 { append_to_dynamic_range(&lst[gc], range, allocator) } range.first = p.first_cp range.last = p.last_cp append_to_dynamic_range(&lst[p.gc], range ,allocator) range.first = -1 range.last = -1 } } if range.first != -1 { append_to_dynamic_range(&lst[gc], range, allocator) } return } extra_digits :: proc(ud: ^Unicode_Data, allocator := context.allocator) -> (Dynamic_Range) { range := Range_Rune { first = -1, last = -1, } exd: Dynamic_Range for point in ud { switch p in point { case Char: exd_type := p.gc != .Nd && (p.nt == .Decimal || p.nt == .Digit) if range.first != -1 && (p.cp != range.last + 1 || !exd_type) { append_to_dynamic_range(&exd, range, allocator) range.first = -1 range.last = -1 } if exd_type { range.first = rune(min(u32(range.first), u32(p.cp))) range.last = p.cp } case Char_Range: exd_type := p.gc != .Nd && (p.nt == .Decimal || p.nt == .Digit) if range.first != -1 { append_to_dynamic_range(&exd, range, allocator) } if exd_type { range.first = p.first_cp range.last = p.last_cp append_to_dynamic_range(&exd, range ,allocator) } range.first = -1 range.last = -1 } } if range.first != -1 { append_to_dynamic_range(&exd, range, allocator) } return exd } /* Data contained in the Unicode fiel PropList.txt A `Prop_List` is the data contained in the Unicode Database (UCD) file `PropList.txt`. It is created with the procedure `load_property_list` and destroyed with the procedure `destroy_property_list`. */ Prop_List :: [Prop_List_Property]Dynamic_Range /* This function destroys a `Prop_List` created by `load_property_list`. Inputs: - props: The Prop_List to destroy */ destroy_property_list :: proc(props: Prop_List) { for r in props { delete(r.ranges_16) delete(r.ranges_32) delete(r.single_16) delete(r.single_32) } } load_property_list :: proc(filename: string, allocator := context.allocator) -> (props: Prop_List, err: Error) { data := os.read_entire_file(filename, allocator) or_return defer delete(data) str := string(data) for _line in strings.split_lines_iterator(&str) { line, _, _ := strings.partition(_line, "#") if len(line) == 0 { continue } rr: Range_Rune prop: Prop_List_Property i := 0 for _field in strings.split_iterator(&line, ";") { defer i += 1 field := strings.trim_space(_field) switch i { // Code point or code point range case 0: rr.first, rr.last = decode_rune(field) or_return case 1: prop = string_to_proplist_property(field) or_return case: return {}, .Extra_Fields } } append_to_dynamic_range(&props[prop], rr, allocator) } return }