Added program to generate Unicode Table

This adds a program that will generate tables for use by the `core/unicode` package. The table generated file will be `core/unicode/generated.odin` It may be better to incorporate this into `generate_entity_table.odin`. This can easily be accomplised if desired.
2026-06-15 14:53:43 +00:00 · 2026-03-07 17:34:37 -05:00
parent c4f5f9e55a
commit 8f579d1f3b
5 changed files with 1800 additions and 0 deletions
--- a/core/unicode/tools/generate_unicode.odin
+++ b/core/unicode/tools/generate_unicode.odin
@@ -0,0 +1,325 @@
+package main
+import "core:fmt"
+import path "core:path/filepath"
+import "core:os"
+import "core:strings"
+import "base:runtime"
+import "core:mem"
+import "core:io"
+import "core:log"
+import "ucd"
+
+// Table 2-3. Types of Code Points
+// Table 4-4. General_Category Values page 229
+
+// Reference https://www.unicode.org/reports/tr44/
+
+
+/*
+Formats a ucd.Dynamic_Range into a set of fixed length arrays and writes
+corresponding to a io.Writer. The value of the parameter `name`will be used as a
+prefix to the array names. If a dynamic array contained in the `range` is empty,
+no corresponding fixed length array will be written.
+
+Inputs:
+- writer: The io.Writer to be written to.
+- name: Prefix to add to any array that is written to `writer`
+- range: The ucd.Dynamic_Range to format and write to writer.
+*/
+write_range_arrays :: proc(
+	writer: io.Writer,
+	name: string,
+	range : ucd.Dynamic_Range,
+) -> int {
+	n_written : int
+	if len(range.single_16) > 0 { 
+		n_written += fmt.wprintln(writer, "@(rodata)")
+		n_written += fmt.wprintf(writer, "%s_singles16 := [?]u16{{", name)
+		line_length := 100 
+		for v in range.single_16 {
+			str_buffer : [32]byte
+			str := fmt.bprintf(str_buffer[:], " 0x%4X,",v)
+
+			if line_length + len(str) > 80 {
+				n_written += fmt.wprintf(writer, "\n")
+				line_length = fmt.wprintf(writer, "\t0x%4X,",v)
+				n_written +=  line_length
+			} else {
+				temp, _ := io.write_string(writer, str)
+				line_length += temp
+				n_written += temp
+			}
+		}
+		n_written += fmt.wprintln(writer, "\n}\n")
+	}
+	
+	if len(range.ranges_16) > 0 {
+		n_written += fmt.wprintln(writer, "@(rodata)")
+		n_written += fmt.wprintfln(writer, "%s_ranges16 := [?]u16{{", name)
+		for v in range.ranges_16 {
+			n_written += fmt.wprintfln(writer, "\t0x%4X, 0x%4X,", v.first, v.last)
+		}
+		n_written += fmt.wprintln(writer, "}\n")
+	}
+
+	if len(range.single_32) > 0 {
+		n_written += fmt.wprintln(writer, "@(rodata)")
+		n_written += fmt.wprintf(writer, "%s_singles32 := [?]i32{{", name)
+		line_length := 100
+		for v in range.single_32 {
+			str_buffer : [32]byte
+			str := fmt.bprintf(str_buffer[:], " 0x%4X,",v)
+
+			if line_length + len(str) > 80 { 
+				n_written += fmt.wprint(writer, "\n")
+				line_length = fmt.wprintf(writer, "\t0x%4X,",v)
+				n_written += line_length
+			} else {
+				temp, _ := io.write_string(writer, str)
+				line_length += temp
+				n_written += temp
+			}
+		}
+		n_written += fmt.wprintln(writer, "\n}\n")
+	}
+
+	if len(range.ranges_32) > 0 {
+		n_written += fmt.wprintln(writer, "@(rodata)")
+		n_written += fmt.wprintfln(writer, "%s_ranges32 := [?]i32{{", name)
+		for v in range.ranges_32 {
+			n_written += fmt.wprintfln(writer, "\t0x%4X, 0x%4X,", v.first, v.last)
+		}
+		n_written += fmt.wprintln(writer, "}\n")
+	}
+
+	return n_written
+}
+
+write_range :: proc(
+	writer: io.Writer,
+	name: union{string,
+	ucd.General_Category},
+	range: ucd.Dynamic_Range,
+) -> (n_written: int) {
+	buffer: [128]byte
+	str: string
+
+	switch n in name{
+	case string:
+		assert(len(n) <= len(buffer))
+		runtime.mem_copy(&buffer[0], raw_data(n), len(n))
+		str = transmute(string) buffer[0:len(n)]
+
+	case ucd.General_Category:
+		str = fmt.bprintf(buffer[:], "%s", n)
+	}
+
+	for &b in buffer[0:len(str)] {
+		if b >= 'A' && b <= 'Z' {
+			b += ('a' - 'A')
+		}
+	}
+
+	n_written = write_range_arrays(writer, str, range)
+
+	n_written += fmt.wprintfln(writer, "%s_ranges := Range{{", str)
+	if len(range.single_16) > 0 {
+		n_written += fmt.wprintfln(writer, "\tsingle_16 = %s_singles16[:],", str) 
+	}
+	if len(range.ranges_16) > 0 {
+		n_written += fmt.wprintfln(writer, "\tranges_16 = %s_ranges16[:],", str) 
+	}
+	if len(range.single_32) > 0 {
+		n_written += fmt.wprintfln(writer, "\tsingle_32 = %s_singles32[:],", str) 
+	}
+	if len(range.ranges_32) > 0 {
+		n_written += fmt.wprintfln(writer, "\tranges_32 = %s_ranges32[:],", str) 
+	}
+	n_written += fmt.wprintln(writer, "}\n")
+
+	return
+}
+
+GENERATED :: `/*
+	------ GENERATED ------ DO NOT EDIT ------ GENERATED ------ DO NOT EDIT ------ GENERATED ------
+*/
+`
+
+MESSAGE :: `/* 
+	This file is generated from UnicodeData.txt and PropList.txt. These files
+	are part of the Unicode Database (UCD) and are covered by the license
+	listed further down. They may be downloaded from the following locations;
+
+	https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
+	https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
+	https://www.unicode.org/license.txt
+
+	------------------------------------------------------------------------------
+	UNICODE LICENSE V3
+	
+	COPYRIGHT AND PERMISSION NOTICE
+	
+	Copyright © 1991-2026 Unicode, Inc.
+	
+	NOTICE TO USER: Carefully read the following legal agreement. BY
+	DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
+	SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+	TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
+	DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
+	
+	Permission is hereby granted, free of charge, to any person obtaining a
+	copy of data files and any associated documentation (the "Data Files") or
+	software and any associated documentation (the "Software") to deal in the
+	Data Files or Software without restriction, including without limitation
+	the rights to use, copy, modify, merge, publish, distribute, and/or sell
+	copies of the Data Files or Software, and to permit persons to whom the
+	Data Files or Software are furnished to do so, provided that either (a)
+	this copyright and permission notice appear with all copies of the Data
+	Files or Software, or (b) this copyright and permission notice appear in
+	associated Documentation.
+	
+	THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+	KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
+	THIRD PARTY RIGHTS.
+	
+	IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
+	BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
+	OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+	WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+	ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
+	FILES OR SOFTWARE.
+	
+	Except as contained in this notice, the name of a copyright holder shall
+	not be used in advertising or otherwise to promote the sale, use or other
+	dealings in these Data Files or Software without prior written
+	authorization of the copyright holder.
+
+*/
+`
+
+main :: proc() {
+	track: mem.Tracking_Allocator
+
+	mem.tracking_allocator_init(&track, context.allocator)
+	defer {
+		if len(track.allocation_map) > 0 {
+			fmt.eprintf("=== %v allocations not freed: ===\n", len(track.allocation_map))
+			for _, entry in track.allocation_map {
+				fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
+			}
+		}
+		mem.tracking_allocator_destroy(&track)
+	}
+
+	context.allocator = mem.tracking_allocator(&track)
+
+	context.logger = log.create_console_logger()
+	defer log.destroy_console_logger(context.logger)
+
+	ucd_path, _ := path.join({ODIN_ROOT,
+		"tests","core","assets","UCD","UnicodeData.txt"}, context.allocator)
+	defer delete(ucd_path)
+
+	unicode_data, ucd_err := ucd.load_unicode_data(ucd_path)
+	if ucd_err != nil {
+		log.errorf("Error loading Unicode data. %s", ucd_err)
+	}
+	defer ucd.destroy_unicode_data(unicode_data)
+
+	general_category_ranges := ucd.gc_ranges(&unicode_data)
+	defer ucd.destroy_general_category_ranges(general_category_ranges)  
+
+	extra_digits := ucd.extra_digits(&unicode_data)
+	defer ucd.destroy_dynamic_range(extra_digits) 
+
+
+	proplist_path, _ := path.join({ODIN_ROOT,
+		"tests","core","assets","UCD","PropList.txt"}, context.allocator)
+	defer delete(proplist_path)
+	proplist, proplist_err := ucd.load_protperty_list(proplist_path)
+	if proplist_err != nil {
+		log.errorf("Error loading PropList.txt. %s", proplist_err)
+		return
+	}
+	defer ucd.destroy_protperty_list(proplist) 
+
+
+
+ 	sb := strings.builder_make_len_cap(0, 1024*32)
+ 	defer strings.builder_destroy(&sb)
+ 
+ 
+ 	writer := strings.to_writer(&sb)
+ 
+ 	fmt.wprintfln(writer, "package unicode\n")
+ 	fmt.wprintln(writer, GENERATED)
+ 	fmt.wprintln(writer, MESSAGE)
+ 
+ 	Range_Type :: "Range :: struct {\n" + 
+ 		"\tsingle_16 : []u16,\n" + 
+ 		"\tranges_16 : []u16,\n" +
+ 		"\tsingle_32 : []i32,\n" +
+ 		"\tranges_32 : []i32,\n" +
+ 		"}\n"
+ 
+ 	fmt.wprintfln(writer, "%s", Range_Type)
+
+	//List of the general categories to skip when generating the code for
+	//core/unicode/generated.txt. 
+	to_exclude := [?]ucd.General_Category{
+		.Cc, // Control, a C0 or C1 control code
+		.Cf, // Format, a format control character
+		.Cn, // Unassigned, a reserved unassigned code point or a noncharacter
+		.Co, // Private_Use, a private-use character
+		.Cs, // Surrogate, a surrogate code point
+		// .Ll, // Lowercase_Letter, a lowercase letter
+		// .Lm, // Modifier_Letter, a modifier letter
+		// .Lo, // Other_Letter, other letters, including syllables and ideographs
+		// .Lt, // Titlecase_Letter, a digraph encoded as a single character, with first part uppercase
+		// .Lu, // Uppercase_Letter, an uppercase letter
+		.Mc, // Spacing_Mark, a spacing combining mark (positive advance width)
+		.Me, // Enclosing_Mark, an enclosing combining mark
+		.Mn, // Nonspacing_Mark, a nonspacing combining mark (zero advance width)
+		//.Nd, // Decimal_Number, a decimal digit
+		//.Nl, // Letter_Number, a letterlike numeric character
+		//.No, // Other_Number, a numeric character of other type
+		.Pc, // Connector_Punctuation, a connecting punctuation mark, like a tie
+		.Pd, // Dash_Punctuation, a dash or hyphen punctuation mark
+		.Pe, // Close_Punctuation, a closing punctuation mark (of a pair)
+		.Pf, // Final_Punctuation, a final quotation mark
+		.Pi, // Initial_Punctuation, an initial quotation mark
+		.Po, // Other_Punctuation, a punctuation mark of other type
+		.Ps, // Open_Punctuation, an opening punctuation mark (of a pair)
+		.Sc, // Currency_Symbol, a currency sign
+		.Sk, // Modifier_Symbol, a non-letterlike modifier symbol
+		.Sm, // Math_Symbol, a symbol of mathematical use
+		.So, // Other_Symbol, a symbol of other type
+		.Zl, // Line_Separator, U+2028 LINE SEPARATOR only
+		.Zp, // Paragraph_Separator, U+2029 PARAGRAPH SEPARATOR only
+		.Zs, // Space_Separator, a space character (of various non-zero widths)
+	}
+ 
+ 	write_loop : for gc, i in general_category_ranges { 
+ 		for excluded in to_exclude {
+ 			if i == excluded do continue write_loop
+ 		}
+ 		write_range(writer, i, gc)
+ 	}
+
+ 	write_range(writer, "extra_digits", extra_digits )
+ 
+ 	write_range(writer,"other_lowercase", proplist[.Other_Lowercase])
+ 	write_range(writer,"other_uppercase", proplist[.Other_Uppercase])
+
+ 	file_name, _ := path.join({ODIN_ROOT, "core", "unicode", "generated.odin"}, context.allocator)
+ 	defer delete(file_name)
+ 
+ 	str := strings.to_string(sb)
+
+	write_error := os.write_entire_file_from_string(file_name, str)
+    if write_error != nil {
+ 		log.errorf("Error writting %s. %s", file_name, write_error)
+ 	}
+}
+
--- a/core/unicode/tools/ucd/iterator.odin
+++ b/core/unicode/tools/ucd/iterator.odin
@@ -0,0 +1,70 @@
+package ucd
+
+/*
+An iterator that allows simple iterating over the lines of of a slice of bytes, []byte,
+without allocating. Each line must end in a new line, i.e., '\n'
+*/
+Line_Iterator :: struct {
+	index: int, // current location in data
+	data: []byte, // Data over which to iterate
+	line_counter: int, // line number storage  
+}
+
+line_iterator :: proc(it: ^Line_Iterator) -> (line: []byte, line_number: int,  more: bool) {
+	more = it.index < len(it.data)
+	if more {
+		it.line_counter += 1
+		line_number = it.line_counter
+	} else {
+		return
+	}	
+	start:= it.index
+	for it.index < len(it.data) && it.data[it.index] != '\n' && it.data[it.index] != '#' do it.index += 1
+	line = it.data[start:it.index]
+	//index = start
+
+	if it.index < len(it.data) && it.data[it.index] == '#' {
+		for it.index < len(it.data) && it.data[it.index] != '\n' do it.index += 1
+	}
+	if it.index < len(it.data) && it.data[it.index] == '\n' do it.index += 1
+	return
+}
+
+Field_Iterator :: struct {
+	index: int,
+	field_counter: int,
+	line: []byte,
+}
+
+field_iterator :: proc(it: ^Field_Iterator) -> (field: []byte, field_count: int,  valid: bool) {
+	valid = it.index < len(it.line) && it.line[it.index] != '\n' && it.line[it.index] != '#'
+	if !valid do return
+
+	if it.index < len(it.line) && it.index != 0 && it.line[it.index] == ';' do it. index += 1
+
+	start := it.index
+	for it.index < len(it.line) && it.line[it.index] != ';'  && it.line[it.index] != '#' do it.index += 1
+
+	field = it.line[start:it.index]	
+	temp := field
+
+	// Remove leading spaces
+	for b, i in temp {
+		if b != ' ' {
+			field = temp[i:]
+			break
+		}
+	}
+
+	// Remove trailing spaces
+	temp = field
+	for b, i in temp {
+		if b != ' ' {
+			field = temp[0:i+1]
+		}
+	}
+
+	field_count = it.field_counter
+	it.field_counter += 1
+	return
+}
--- a/core/unicode/tools/ucd/string_to.odin
+++ b/core/unicode/tools/ucd/string_to.odin
@@ -0,0 +1,396 @@
+package ucd
+
+string_to_general_category :: proc "contextless"(
+	str: string,
+) -> (gc: General_Category, err: Error) {
+	switch str {
+	case "Lu":	
+		gc = .Lu	
+	case "Ll":	
+		gc = .Ll
+	case "Lt":	
+		gc = .Lt
+	case "Lm":	
+		gc = .Lm
+	case "Lo":	
+		gc = .Lo
+	case "Mn":	
+		gc = .Mn
+	case "Mc":	
+		gc = .Mc
+	case "Me":	
+		gc = .Me
+	case "Nd":	
+		gc = .Nd
+	case "Nl":	
+		gc = .Nl
+	case "No":	
+		gc = .No
+	case "Pc":	
+		gc = .Pc
+	case "Pd":	
+		gc = .Pd
+	case "Ps":	
+		gc = .Ps
+	case "Pe":	
+		gc = .Pe
+	case "Pi":	
+		gc = .Pi
+	case "Pf":	
+		gc = .Pf
+	case "Po":	
+		gc = .Po
+	case "Sm":	
+		gc = .Sm
+	case "Sc":	
+		gc = .Sc
+	case "Sk":	
+		gc = .Sk
+	case "So":	
+		gc = .So
+	case "Zs":	
+		gc = .Zs
+	case "Zl":	
+		gc = .Zl
+	case "Zp":	
+		gc = .Zp
+	case "Cc":	
+		gc = .Cc
+	case "Cf":	
+		gc = .Cf
+	case "Cs":	
+		gc = .Cs
+	case "Co":	
+		gc = .Co
+	case "Cn":	
+		gc = .Cn
+	case:
+		err = UCD_Error.Invalid_General_Category
+	}
+	return
+}
+
+
+string_to_age :: proc "contextless" (
+	str: string,
+) -> (age: Age, err: Error) {
+	switch str {
+	case "1.1":
+		age = .Age_1_1
+		return
+
+	case "2.0":
+		age = .Age_2_0
+		return
+
+	case "2.1":
+		age = .Age_2_1
+		return
+
+	case "3.0":
+		age = .Age_3_0
+		return
+
+	case "3.1":
+		age = .Age_3_1
+		return
+
+	case "3.2":
+		age = .Age_3_2
+		return
+
+	case "4.0":
+		age = .Age_4_0
+		return
+
+	case "4.1":
+		age = .Age_4_1
+		return
+
+	case "5.0":
+		age = .Age_5_0
+		return
+
+	case "5.1":
+		age = .Age_5_1
+		return
+
+	case "5.2":
+		age = .Age_5_2
+		return
+
+	case "6.0":
+		age = .Age_6_0
+		return
+
+	case "6.1":
+		age = .Age_6_1
+		return
+
+	case "6.2":
+		age = .Age_6_2
+		return
+
+	case "6.3":
+		age = .Age_6_3
+		return
+
+	case "7.0":
+		age = .Age_7_0
+		return
+
+	case "8.0":
+		age = .Age_8_0
+		return
+
+	case "9.0":
+		age = .Age_9_0
+		return
+
+	case "10.0":
+		age = .Age_10_0
+		return
+
+	case "11.0":
+		age = .Age_11_0
+		return
+
+	case "12.0":
+		age = .Age_12_0
+		return
+
+	case "12.1":
+		age = .Age_12_1
+		return
+
+	case "13.0":
+		age = .Age_13_0
+		return
+
+	case "14.0":
+		age = .Age_14_0
+		return
+
+	case "15.0":
+		age = .Age_15_0
+		return
+
+	case "15.1":
+		age = .Age_15_1
+		return
+
+	case "16.0":
+		age = .Age_16_0
+		return
+
+	case "17.0":
+		age = .Age_17_0
+		return
+
+	case "unassigned":
+		age = .Age_Unassigned
+		return
+
+	case:
+		// NOTE: Should this return an error instead?
+		unreachable()
+	}
+}
+
+
+string_to_paired_bracket_type :: proc "contextless"(str: string) -> Paired_Brack_Type {
+	switch str {
+	case "o":
+		return .Open
+	case "c":
+		return .Close
+	case "n":
+		return .None
+	case:
+		// TODO: Add error for this
+		unreachable()
+	}
+}
+
+string_to_bidi_class :: proc "contextless"(str: string) -> Bidi_Class {
+	switch str {
+		case "AL":
+			return .AL
+		case "AN":
+			return .AN
+		case "B":
+			return .B
+		case "BN":
+			return .BN
+		case "CS":
+			return .CS
+		case "EN":
+			return .EN
+		case "ES":
+			return .ES
+		case "ET":
+			return .ET
+		case "FSI":
+			return .FSI
+		case "L":
+			return .L
+		case "LRE":
+			return .LRE
+		case "LRI":
+			return .LRI
+		case "LRO":
+			return .LRO
+		case "NSM":
+			return .NSM
+		case "ON":
+			return .ON
+		case "PDF":
+			return .PDF
+		case "PDI":
+			return .PDI
+		case "R":
+			return .R
+		case "RLE":
+			return .RLE
+		case "RLI":
+			return .RLI
+		case "RLO":
+			return .RLO
+		case "S":
+			return .S
+		case "WS":
+			return .WS 
+		case:
+		// TODO: Add error for this
+			unreachable()
+	}
+}
+
+string_to_proplist_property :: proc(str: string) -> (
+	prop: PropList_Property, 
+	err: UCD_Error,
+) {
+
+	switch str {
+	case "White_Space":
+		prop = .White_Space
+
+	case "Bidi_Control":
+		prop = .Bidi_Control
+
+	case "Join_Control":
+		prop = .Join_Control 
+
+	case "Dash":
+		prop = .Dash
+
+	case "Hyphen":
+		prop = .Hyphen
+
+	case "Quotation_Mark":
+		prop = .Quotation_Mark
+
+	case "Terminal_Punctuation":
+		prop = .Terminal_Punctuation
+
+	case "Other_Math":
+		prop = .Other_Math
+
+	case "Hex_Digit":
+		prop = .Hex_Digit
+
+	case "ASCII_Hex_Digit":
+		prop = .ASCII_Hex_Digit
+
+	case "Other_Alphabetic":
+		prop = .Other_Alphabetic
+
+	case "Ideographic":
+		prop = .Ideographic
+
+	case "Diacritic":
+		prop = .Diacritic
+
+	case "Extender":
+		prop = .Extender
+
+	case "Other_Lowercase":
+		prop = .Other_Lowercase
+
+	case "Other_Uppercase":
+		prop = .Other_Uppercase
+
+
+	case "Noncharacter_Code_Point":
+		prop = .Noncharacter_Code_Point
+
+	case "Other_Grapheme_Extend":
+		prop = .Other_Grapheme_Extend
+
+	case "IDS_Binary_Operator":
+		prop = .IDS_Binary_Operator
+
+	case "IDS_Trinary_Operator":
+		prop = .IDS_Trinary_Operator
+
+	case "IDS_Unary_Operator":
+		prop = .IDS_Unary_Operator
+
+	case "Radical":
+		prop = .Radical
+
+	case "Unified_Ideograph":
+		prop = .Unified_Ideograph
+
+	case "Other_Default_Ignorable_Code_Point":
+		prop = .Other_Default_Ignorable_Code_Point
+
+	case "Deprecated":
+		prop = .Deprecated
+
+	case "Soft_Dotted":
+		prop = .Soft_Dotted
+
+	case "Logical_Order_Exception":
+		prop = .Logical_Order_Exception
+
+	case "Other_ID_Start":
+		prop = .Other_ID_Start
+	
+	case "Other_ID_Continue":
+		prop = .Other_ID_Continue
+
+	case "ID_Compat_Math_Continue":
+		prop = .ID_Compat_Math_Continue
+
+	case "ID_Compat_Math_Start":
+		prop = .ID_Compat_Math_Start
+
+	case "Sentence_Terminal":
+		prop = .Sentence_Terminal
+	
+	case "Variation_Selector":
+		prop = .Variation_Selector
+
+	case "Pattern_White_Space":
+		prop = .Pattern_White_Space
+
+	case "Pattern_Syntax":
+		prop = .Pattern_Syntax
+
+	case "Prepended_Concatenation_Mark":
+		prop = .Prepended_Concatenation_Mark
+
+	case "Regional_Indicator":
+		prop = .Regional_Indicator
+
+	case "Modifier_Combining_Mark":
+		prop = .Modifier_Combining_Mark
+
+	case:
+		err = .Unknown_Property 
+		return
+	}
+
+	return 
+}
--- a/core/unicode/tools/ucd/types.odin
+++ b/core/unicode/tools/ucd/types.odin
@@ -0,0 +1,702 @@
+package ucd
+
+import "core:os"
+
+Age :: enum byte {
+	Nil = 0,
+	Age_1_1,
+	Age_2_0,
+	Age_2_1,
+	Age_3_0,
+	Age_3_1,
+	Age_3_2,
+	Age_4_0,
+	Age_4_1,
+	Age_5_0,
+	Age_5_1,
+	Age_5_2,
+	Age_6_0,
+	Age_6_1,
+	Age_6_2,
+	Age_6_3,
+	Age_7_0,
+	Age_8_0,
+	Age_9_0,
+	Age_10_0,
+	Age_11_0,
+	Age_12_0,
+	Age_12_1,
+	Age_13_0,
+	Age_14_0,
+	Age_15_0,
+	Age_15_1,
+	Age_16_0,
+	Age_17_0,
+	Age_Unassigned,
+}
+
+General_Category :: enum {
+	Cc, // Control, a C0 or C1 control code
+	Cf, // Format, a format control character
+	Cn, // Unassigned, a reserved unassigned code point or a noncharacter
+	Co, // Private_Use, a private-use character
+	Cs, // Surrogate, a surrogate code point
+	Ll, // Lowercase_Letter, a lowercase letter
+	Lm, // Modifier_Letter, a modifier letter
+	Lo, // Other_Letter, other letters, including syllables and ideographs
+	Lt, // Titlecase_Letter, a digraph encoded as a single character, with first part uppercase
+	Lu, // Uppercase_Letter, an uppercase letter
+	Mc, // Spacing_Mark, a spacing combining mark (positive advance width)
+	Me, // Enclosing_Mark, an enclosing combining mark
+	Mn, // Nonspacing_Mark, a nonspacing combining mark (zero advance width)
+	Nd, // Decimal_Number, a decimal digit
+	Nl, // Letter_Number, a letterlike numeric character
+	No, // Other_Number, a numeric character of other type
+	Pc, // Connector_Punctuation, a connecting punctuation mark, like a tie
+	Pd, // Dash_Punctuation, a dash or hyphen punctuation mark
+	Pe, // Close_Punctuation, a closing punctuation mark (of a pair)
+	Pf, // Final_Punctuation, a final quotation mark
+	Pi, // Initial_Punctuation, an initial quotation mark
+	Po, // Other_Punctuation, a punctuation mark of other type
+	Ps, // Open_Punctuation, an opening punctuation mark (of a pair)
+	Sc, // Currency_Symbol, a currency sign
+	Sk, // Modifier_Symbol, a non-letterlike modifier symbol
+	Sm, // Math_Symbol, a symbol of mathematical use
+	So, // Other_Symbol, a symbol of other type
+	Zl, // Line_Separator, U+2028 LINE SEPARATOR only
+	Zp, // Paragraph_Separator, U+2029 PARAGRAPH SEPARATOR only
+	Zs, // Space_Separator, a space character (of various non-zero widths)
+}
+
+Block :: enum {
+	Nil = 0,
+	Adlam,
+	Aegean_Numbers,
+	Ahom,
+	Alchemical,
+	Alphabetic_PF,
+	Anatolian_Hieroglyphs,
+	Ancient_Greek_Music,
+	Ancient_Greek_Numbers,
+	Ancient_Symbols,
+	Arabic,
+	Arabic_Ext_A,
+	Arabic_Ext_B,
+	Arabic_Ext_C,
+	Arabic_Math,
+	Arabic_PF_A,
+	Arabic_PF_B,
+	Arabic_Sup,
+	Armenian,
+	Arrows,
+	ASCII,
+	Avestan,
+	Balinese,
+	Bamum,
+	Bamum_Sup,
+	Bassa_Vah,
+	Batak,
+	Bengali,
+	Beria_Erfe,
+	Bhaiksuki,
+	Block_Elements,
+	Bopomofo,
+	Bopomofo_Ext,
+	Box_Drawing,
+	Brahmi,
+	Braille,
+	Buginese,
+	Buhid,
+	Byzantine_Music,
+	Carian,
+	Caucasian_Albanian,
+	Chakma,
+	Cham,
+	Cherokee,
+	Cherokee_Sup,
+	Chess_Symbols,
+	Chorasmian,
+	CJK,
+	CJK_Compat,
+	CJK_Compat_Forms,
+	CJK_Compat_Ideographs,
+	CJK_Compat_Ideographs_Sup,
+	CJK_Ext_A,
+	CJK_Ext_B,
+	CJK_Ext_C,
+	CJK_Ext_D,
+	CJK_Ext_E,
+	CJK_Ext_F,
+	CJK_Ext_G,
+	CJK_Ext_H,
+	CJK_Ext_I,
+	CJK_Ext_J,
+	CJK_Radicals_Sup,
+	CJK_Strokes,
+	CJK_Symbols,
+	Compat_Jamo,
+	Control_Pictures,
+	Coptic,
+	Coptic_Epact_Numbers,
+	Counting_Rod,
+	Cuneiform,
+	Cuneiform_Numbers,
+	Currency_Symbols,
+	Cypriot_Syllabary,
+	Cypro_Minoan,
+	Cyrillic,
+	Cyrillic_Ext_A,
+	Cyrillic_Ext_B,
+	Cyrillic_Ext_C,
+	Cyrillic_Ext_D,
+	Cyrillic_Sup,
+	Deseret,
+	Devanagari,
+	Devanagari_Ext,
+	Devanagari_Ext_A,
+	Diacriticals,
+	Diacriticals_Ext,
+	Diacriticals_For_Symbols,
+	Diacriticals_Sup,
+	Dingbats,
+	Dives_Akuru,
+	Dogra,
+	Domino,
+	Duployan,
+	Early_Dynastic_Cuneiform,
+	Egyptian_Hieroglyph_Format_Controls,
+	Egyptian_Hieroglyphs,
+	Egyptian_Hieroglyphs_Ext_A,
+	Elbasan,
+	Elymaic,
+	Emoticons,
+	Enclosed_Alphanum,
+	Enclosed_Alphanum_Sup,
+	Enclosed_CJK,
+	Enclosed_Ideographic_Sup,
+	Ethiopic,
+	Ethiopic_Ext,
+	Ethiopic_Ext_A,
+	Ethiopic_Ext_B,
+	Ethiopic_Sup,
+	Garay,
+	Geometric_Shapes,
+	Geometric_Shapes_Ext,
+	Georgian,
+	Georgian_Ext,
+	Georgian_Sup,
+	Glagolitic,
+	Glagolitic_Sup,
+	Gothic,
+	Grantha,
+	Greek,
+	Greek_Ext,
+	Gujarati,
+	Gunjala_Gondi,
+	Gurmukhi,
+	Gurung_Khema,
+	Half_And_Full_Forms,
+	Half_Marks,
+	Hangul,
+	Hanifi_Rohingya,
+	Hanunoo,
+	Hatran,
+	Hebrew,
+	High_PU_Surrogates,
+	High_Surrogates,
+	Hiragana,
+	IDC,
+	Ideographic_Symbols,
+	Imperial_Aramaic,
+	Indic_Number_Forms,
+	Indic_Siyaq_Numbers,
+	Inscriptional_Pahlavi,
+	Inscriptional_Parthian,
+	IPA_Ext,
+	Jamo,
+	Jamo_Ext_A,
+	Jamo_Ext_B,
+	Javanese,
+	Kaithi,
+	Kaktovik_Numerals,
+	Kana_Ext_A,
+	Kana_Ext_B,
+	Kana_Sup,
+	Kanbun,
+	Kangxi,
+	Kannada,
+	Katakana,
+	Katakana_Ext,
+	Kawi,
+	Kayah_Li,
+	Kharoshthi,
+	Khitan_Small_Script,
+	Khmer,
+	Khmer_Symbols,
+	Khojki,
+	Khudawadi,
+	Kirat_Rai,
+	Lao,
+	Latin_1_Sup,
+	Latin_Ext_A,
+	Latin_Ext_Additional,
+	Latin_Ext_B,
+	Latin_Ext_C,
+	Latin_Ext_D,
+	Latin_Ext_E,
+	Latin_Ext_F,
+	Latin_Ext_G,
+	Lepcha,
+	Letterlike_Symbols,
+	Limbu,
+	Linear_A,
+	Linear_B_Ideograms,
+	Linear_B_Syllabary,
+	Lisu,
+	Lisu_Sup,
+	Low_Surrogates,
+	Lycian,
+	Lydian,
+	Mahajani,
+	Mahjong,
+	Makasar,
+	Malayalam,
+	Mandaic,
+	Manichaean,
+	Marchen,
+	Masaram_Gondi,
+	Math_Alphanum,
+	Math_Operators,
+	Mayan_Numerals,
+	Medefaidrin,
+	Meetei_Mayek,
+	Meetei_Mayek_Ext,
+	Mende_Kikakui,
+	Meroitic_Cursive,
+	Meroitic_Hieroglyphs,
+	Miao,
+	Misc_Arrows,
+	Misc_Math_Symbols_A,
+	Misc_Math_Symbols_B,
+	Misc_Pictographs,
+	Misc_Symbols,
+	Misc_Symbols_Sup,
+	Misc_Technical,
+	Modi,
+	Modifier_Letters,
+	Modifier_Tone_Letters,
+	Mongolian,
+	Mongolian_Sup,
+	Mro,
+	Multani,
+	Music,
+	Myanmar,
+	Myanmar_Ext_A,
+	Myanmar_Ext_B,
+	Myanmar_Ext_C,
+	Nabataean,
+	Nag_Mundari,
+	Nandinagari,
+	NB,
+	New_Tai_Lue,
+	Newa,
+	NKo,
+	Number_Forms,
+	Nushu,
+	Nyiakeng_Puachue_Hmong,
+	OCR,
+	Ogham,
+	Ol_Chiki,
+	Ol_Onal,
+	Old_Hungarian,
+	Old_Italic,
+	Old_North_Arabian,
+	Old_Permic,
+	Old_Persian,
+	Old_Sogdian,
+	Old_South_Arabian,
+	Old_Turkic,
+	Old_Uyghur,
+	Oriya,
+	Ornamental_Dingbats,
+	Osage,
+	Osmanya,
+	Ottoman_Siyaq_Numbers,
+	Pahawh_Hmong,
+	Palmyrene,
+	Pau_Cin_Hau,
+	Phags_Pa,
+	Phaistos,
+	Phoenician,
+	Phonetic_Ext,
+	Phonetic_Ext_Sup,
+	Playing_Cards,
+	Psalter_Pahlavi,
+	PUA,
+	Punctuation,
+	Rejang,
+	Rumi,
+	Runic,
+	Samaritan,
+	Saurashtra,
+	Sharada,
+	Sharada_Sup,
+	Shavian,
+	Shorthand_Format_Controls,
+	Siddham,
+	Sidetic,
+	Sinhala,
+	Sinhala_Archaic_Numbers,
+	Small_Forms,
+	Small_Kana_Ext,
+	Sogdian,
+	Sora_Sompeng,
+	Soyombo,
+	Specials,
+	Sundanese,
+	Sundanese_Sup,
+	Sunuwar,
+	Sup_Arrows_A,
+	Sup_Arrows_B,
+	Sup_Arrows_C,
+	Sup_Math_Operators,
+	Sup_PUA_A,
+	Sup_PUA_B,
+	Sup_Punctuation,
+	Sup_Symbols_And_Pictographs,
+	Super_And_Sub,
+	Sutton_SignWriting,
+	Syloti_Nagri,
+	Symbols_And_Pictographs_Ext_A,
+	Symbols_For_Legacy_Computing,
+	Symbols_For_Legacy_Computing_Sup,
+	Syriac,
+	Syriac_Sup,
+	Tagalog,
+	Tagbanwa,
+	Tags,
+	Tai_Le,
+	Tai_Tham,
+	Tai_Viet,
+	Tai_Xuan_Jing,
+	Tai_Yo,
+	Takri,
+	Tamil,
+	Tamil_Sup,
+	Tangsa,
+	Tangut,
+	Tangut_Components,
+	Tangut_Components_Sup,
+	Tangut_Sup,
+	Telugu,
+	Thaana,
+	Thai,
+	Tibetan,
+	Tifinagh,
+	Tirhuta,
+	Todhri,
+	Tolong_Siki,
+	Toto,
+	Transport_And_Map,
+	Tulu_Tigalari,
+	UCAS,
+	UCAS_Ext,
+	UCAS_Ext_A,
+	Ugaritic,
+	Vai,
+	Vedic_Ext,
+	Vertical_Forms,
+	Vithkuqi,
+	VS,
+	VS_Sup,
+	Wancho,
+	Warang_Citi,
+	Yezidi,
+	Yi_Radicals,
+	Yi_Syllables,
+	Yijing,
+	Zanabazar_Square,
+	Znamenny_Music,
+}
+
+Combining_Class :: distinct byte
+
+Paired_Brack_Type :: enum {
+	Nil,
+	Open,
+	Close,
+	None,
+}
+
+Bidi_Class :: enum {
+	Nil, // 
+	L,   // Left-to-Right  LRM
+	R,   // Right-to-Left  RLM
+	AL,  // Right-to-Left Arabic ALM 
+	EN,  // European Number
+	ES,  // European Number Separator
+	ET,  // European Number Terminator
+	AN,  // Arabic Number
+	CS,  // Common Number Separator
+	NSM, // Nonspacing Mark
+	BN,  // Boundary Neutral
+	B,   // Paragraph Separator
+	S,   // Segment Separator
+	WS,  // Whitespace
+	ON,  // Other Neutrals
+	LRE, // Left-to-Right Embedding  LRE    
+	LRO, // Left-to-Right Override   LRO
+	RLE, // Right-to-Left Embedding  RLE
+	RLO, // Right-to-Left Override   RLO
+	PDF, // Pop Directional Format   PDF
+	LRI, // Left-to-Right Isolate    LRI
+	RLI, // Right-to-Left Isolate    RLI
+	FSI, // First Strong Isolate     FSI
+	PDI, // Pop Directional Isolate  PDI
+}
+
+
+Bidi :: struct {
+	bc: Bidi_Class,
+	bmg: Maybe(rune), // mirrored glyph
+	m: bool, // Bidi mirrored
+	c: bool, // Bidi control property
+	bpt : Paired_Brack_Type, // bidi paired bracket type 
+	bpb : rune, // bidi paired bracket properties 
+}
+
+
+Decomposition_Type :: enum {
+	Nil = 0,
+	can,
+	com,
+	enc,
+	fin,
+	font,
+	fra,
+	init,
+	iso,
+	med,
+	nar,
+	nb,
+	sml,
+	sqr,
+	sub,
+	sup,
+	vert,
+	wid,
+	none,
+}
+
+Trinary_Bool :: enum {
+	Maybe = -1,
+	False = 0,
+	True = 1,
+}
+
+Decomposition_Mapping :: distinct [dynamic]rune 
+
+Decomposition :: struct {
+	dt: Decomposition_Type, // Decomposition type
+	dm: Decomposition_Mapping, // Decomposition Mapping
+	ce: bool, // Composition Exclusion
+	comp_ex: bool, // Full Composition Exclusion
+	nfc_quick_check: Trinary_Bool,
+	nfd_quick_check: bool,
+	nfkc_quick_check: Trinary_Bool,
+	nfkd_quick_check: bool,
+}
+
+Numeric_Type :: enum {
+	None = 0, // None
+	Decimal, // De
+	Digit, // Di
+	Numeric, // Nu
+}
+
+/*
+Note: Value is NAN when numberator and denominator ar 0
+*/
+Numberic_Value :: struct {
+	numerator: int,
+	denominator: int,
+}
+
+Char :: struct {
+	cp: rune,
+	name: string, 
+	gc: General_Category,
+	ccc: Combining_Class,
+	bc: Bidi_Class,
+	dt: Decomposition_Type,
+	dm: Decomposition_Mapping,
+	nt: Numeric_Type,
+	nv: Numberic_Value,
+	bm: bool,
+	name1: string,
+	sum: string, // Simple uppercase mapping
+	slm: string, // Simple lowercase mapping
+	stm: string, // Simple titlecase_mapping
+}
+
+Char_Range :: struct {
+	first_cp: rune,
+	last_cp: rune,
+	name: string, 
+	gc: General_Category,
+	ccc: Combining_Class,
+	bc: Bidi_Class,
+	dt: Decomposition_Type,
+	dm: Decomposition_Mapping,
+	nt: Numeric_Type,
+	nv: Numberic_Value,
+	bm: bool,
+	name1: string,
+	sum: string, // Simple uppercase mapping
+	slm: string, // Simple lowercase mapping
+	stm: string, // Simple titlecase_mapping
+}
+
+Chars :: union {
+	Char,
+	Char_Range,
+}
+
+Unicode_Data :: distinct [dynamic]Chars
+
+
+PropList_Property :: enum {
+	White_Space,
+	Bidi_Control,
+	Join_Control,
+	Dash,	
+	Hyphen, 
+	Quotation_Mark,
+	Terminal_Punctuation,
+	Other_Math,
+	Hex_Digit,	
+	ASCII_Hex_Digit,
+	Other_Alphabetic,
+	Ideographic,
+	Diacritic,
+	Extender,
+	Other_Lowercase,
+	Other_Uppercase,
+	Noncharacter_Code_Point,
+	Other_Grapheme_Extend,
+	IDS_Binary_Operator,
+	IDS_Trinary_Operator,
+	IDS_Unary_Operator,
+	Radical,
+	Unified_Ideograph,
+	Other_Default_Ignorable_Code_Point,
+	Deprecated,
+	Soft_Dotted,
+	Logical_Order_Exception,
+	Other_ID_Start,
+	Other_ID_Continue,
+	ID_Compat_Math_Continue,
+	ID_Compat_Math_Start,
+	Sentence_Terminal,
+	Variation_Selector,
+	Pattern_White_Space,
+	Pattern_Syntax,
+	Prepended_Concatenation_Mark,
+	Regional_Indicator,
+	Modifier_Combining_Mark,
+}
+
+UCD_Error :: enum {
+	XML_LOAD_ERROR,
+	XML_Not_UCD,
+	Nil_XML_Document,
+	Element_Not_Repertoire,
+	Extra_Fields,
+	Unknown_Property,
+
+	NO_REPERTOIRE,
+	UNEXPECTED_STRING,
+	Invalid_Hex_Number,
+	Invalid_General_Category,
+	UnicodeData_6_Too_Long,
+	UnicodeData_6_Invalid,
+	UnicodeData_7_Too_Long,
+	UnicodeData_7_Invalid,
+}
+
+
+Error :: union #shared_nil {
+	UCD_Error,
+	os.Error,
+}
+
+Range_u16 :: struct {
+	first: u16,
+	last: u16,
+}
+
+Range_i32 :: struct {
+	first: i32,
+	last: i32,
+}
+
+Range_Rune :: struct {
+	first: rune,
+	last: rune,
+}
+
+Dynamic_Range :: struct {
+	single_16 : [dynamic]u16,
+	ranges_16 : [dynamic]Range_u16,
+	single_32 : [dynamic]i32,
+	ranges_32 : [dynamic]Range_i32,
+}
+
+append_to_dynamic_range :: proc(
+	dr: ^Dynamic_Range,
+	range: Range_Rune,
+	allocator := context.allocator,
+) {
+	if range.first == range.last && range.first <= 0xFFFF {
+		if len(dr.single_16) == 0 {
+			dr.single_16 = make([dynamic]u16, 0, 512, allocator) 
+		}
+		append(&dr.single_16, cast(u16) range.first)
+	} else if range.first == range.last {
+		if len(dr.single_32) == 0 {
+			dr.single_32 = make([dynamic]i32, 0, 512, allocator) 
+		}
+		append(&dr.single_32, cast(i32) range.first)
+	
+	} else if range.first <= 0xFFFF && range.last <= 0xFFFF {
+		if len(dr.ranges_16) == 0 {
+			dr.ranges_16 = make([dynamic]Range_u16, 0, 128, allocator) 
+		}
+		r := Range_u16{ cast(u16)range.first, cast(u16) range.last}
+		append(&dr.ranges_16, r)
+	
+	} else {
+		if len(dr.ranges_32) == 0 {
+			dr.ranges_32 = make([dynamic]Range_i32, 0, 128, allocator) 
+		}
+		r := Range_i32{ cast(i32)range.first, cast(i32) range.last}
+		append(&dr.ranges_32, r)
+	}
+}
+
+destroy_dynamic_range :: proc (
+	dr: Dynamic_Range,
+){
+	delete(dr.ranges_16)
+	delete(dr.ranges_32)
+	delete(dr.single_16)
+	delete(dr.single_32)
+}
+
+destroy_general_category_ranges :: proc(
+	gcr: [General_Category]Dynamic_Range,
+){
+	for r in gcr {
+		destroy_dynamic_range(r)
+	}
+}
--- a/core/unicode/tools/ucd/ucd.odin
+++ b/core/unicode/tools/ucd/ucd.odin
@@ -0,0 +1,307 @@
+package ucd
+
+import "core:strings"
+import "core:os"
+
+load_unicode_data :: proc(
+	filename: string,
+	allocator := context.allocator,
+) -> (unicode_data : Unicode_Data, err: Error) {
+
+	data, os_error := os.read_entire_file(filename, context.temp_allocator)
+	if os_error != nil {
+		err = os_error
+		return 
+	}
+	defer free_all(context.temp_allocator)
+
+	line_iter := Line_Iterator{data = data }
+	first_cp: rune
+
+	line_loop: for line, line_num in line_iterator(&line_iter) {
+		// Skip empty lines
+		if len(line) == 0 do continue
+
+		field_iter := Field_Iterator{line = line}
+		is_range := false
+		cp: rune
+		name: string
+		gc: General_Category
+
+		num_6 : string
+		num_7 : string
+		nt := Numeric_Type.None
+		nv : Numberic_Value
+
+		for field, field_num in field_iterator(&field_iter) {
+			switch field_num {
+			case 0: // Code point
+				cp = 0
+
+				for c in field {
+					if !(c >= '0' && c <= '9') && !(c >= 'A' && c <= 'F') do break 
+					cp *= 16
+					cp += cast(rune)(c >= '0' && c <= '9')  * cast(rune)(c - '0')  
+					cp += cast(rune)(c >= 'A' && c <= 'F')  * cast(rune)(c - 'A' + 10)
+				}
+
+			case 1: // Name
+				if len(field) > 9 && field[0] == '<' && strings.ends_with(transmute(string) field, ", First>") {
+					first_cp = cp
+					continue line_loop
+				}
+				
+				if len(field) > 9 && field[0] == '<' && strings.ends_with(transmute(string) field, ", Last>") {
+					name = strings.clone_from_bytes(field[1:len(field)-7], allocator)
+					is_range = true
+				} else {
+					name = strings.clone_from_bytes(field[:], allocator)
+				}
+
+			case 2: // General_Category
+				// NOTE: This is currently igorning a possible error it should probably be fixed
+				gc, _ = string_to_general_category(transmute(string)field)
+
+			case 3: // Canonical_Combining_Class
+			case 4: // Bidi Class
+			case 5: // Decomposition_Type and Decomposition_Mapping
+			// Numeric_Type and Numberic_Value
+			case 6:
+				num_6 = transmute(string)field
+
+			case 7:  
+				num_7 = transmute(string)field
+
+			case 8:
+				switch {
+				case num_6 != "" && num_7 != "" && transmute(string) field != "" :
+					nt = .Decimal 
+
+				case num_6 == "" && num_7 != "" && transmute(string) field != "" :
+					nt = .Digit
+
+				case num_6 == "" && num_7 == "" && transmute(string) field != "" :
+					nt = .Numeric
+
+				case:
+					nt = .None
+				}
+
+			case 9: // Bidi mirrored
+			case 10: // Unicode 1 Name (Obsolete as of 6.2.0)
+			case 11: // should be null
+			case 12:
+			case 13:
+			case 14:
+			case: 
+				unreachable()
+			}
+		}
+
+		if is_range {
+			cr : Char_Range
+			cr.gc = gc
+			cr.first_cp = first_cp
+			cr.last_cp = cp
+			cr.name = name
+			cr.nt = nt
+			append(&unicode_data, cr)
+		} else {
+			c : Char
+			c.gc = gc
+			c.cp = cp
+			c.name = name
+			c.nt = nt
+			append(&unicode_data, c)
+		}
+	}
+	return
+}
+
+destroy_unicode_data :: proc(unicode_data: Unicode_Data){
+	for point in unicode_data {
+		switch p in point {
+		case Char:
+			delete(p.name)
+		case Char_Range:
+			delete(p.name)
+		}
+	}
+	delete(unicode_data)
+}
+
+
+gc_ranges :: proc(ud: ^Unicode_Data, allocator := context.allocator) -> (lst: [General_Category]Dynamic_Range) {
+	range := Range_Rune {
+		first = -1,
+		last = -1,
+	}
+	gc: General_Category
+
+	for point in ud {
+		switch p in point {
+		case Char:
+			if range.first != -1 && (p.cp != range.last + 1 || p.gc != gc) {
+				append_to_dynamic_range(&lst[gc], range, allocator)
+				range.first = -1
+				range.last = -1
+			}
+
+			range.first = transmute(rune) min(transmute(u32)range.first, transmute(u32)p.cp)
+			gc = p.gc
+			range.last = p.cp	
+
+		case Char_Range:
+			if range.first != -1 do append_to_dynamic_range(&lst[gc], range, allocator)
+			
+			range.first = p.first_cp
+			range.last = p.last_cp
+			append_to_dynamic_range(&lst[p.gc], range ,allocator)
+			range.first = -1
+			range.last = -1
+		}
+	}
+	if range.first != -1 do append_to_dynamic_range(&lst[gc], range, allocator)
+
+	return
+}
+
+
+extra_digits :: proc(ud: ^Unicode_Data, allocator := context.allocator) -> (Dynamic_Range) {
+	range := Range_Rune {
+		first = -1,
+		last = -1,
+	}
+
+	exd: Dynamic_Range
+	for point in ud {
+		switch p in point {
+
+		case Char:
+			exd_type :=  p.gc != .Nd && (p.nt == .Decimal || p.nt == .Digit)
+
+			if range.first != -1 && (p.cp != range.last + 1 || !exd_type) {
+				append_to_dynamic_range(&exd, range, allocator)
+				range.first = -1
+				range.last = -1
+			}
+		
+			if exd_type {
+				range.first = transmute(rune) min(transmute(u32)range.first, transmute(u32)p.cp)
+				range.last = p.cp	
+			}
+
+		case Char_Range:
+			exd_type :=  p.gc != .Nd && (p.nt == .Decimal || p.nt == .Digit)
+
+			if range.first != -1 do append_to_dynamic_range(&exd, range, allocator)
+		
+			if exd_type {
+				range.first = p.first_cp
+				range.last = p.last_cp
+				append_to_dynamic_range(&exd, range ,allocator)
+			}
+			range.first = -1
+			range.last = -1
+		}
+	}
+	if range.first != -1 do append_to_dynamic_range(&exd, range, allocator)
+
+	return exd
+}
+
+/*
+Data containted in the Unicode fiel PropList.txt 
+
+A `PropList` is the data containted in the Unicode Database (UCD) file 
+PropList.txt. It is created with the procedure `load_property_list` and 
+destroy with the procedure `destroy_property_list`.
+*/
+PropList ::[PropList_Property]Dynamic_Range
+
+/*
+This function destroys a `PropList` created by `load_property_list`.
+
+Inputs:
+- props: The PropList to destroy
+*/
+destroy_protperty_list :: proc(
+	props: [PropList_Property]Dynamic_Range,
+){
+	for r in props {
+		delete(r.ranges_16)
+		delete(r.ranges_32)
+		delete(r.single_16)
+		delete(r.single_32)
+	}
+}
+
+load_protperty_list :: proc (
+	filename : string,
+	allocator := context.allocator,
+) -> (props: [PropList_Property]Dynamic_Range, err: Error) {
+
+	data, os_error := os.read_entire_file(filename, allocator)
+	if os_error != nil {
+		err = os_error
+		return 
+	}
+	defer delete(data)
+
+	line_iter := Line_Iterator{
+		data = data
+	}
+	for line in line_iterator(&line_iter) {
+		if len(line) == 0 do continue
+		field_iter := Field_Iterator{ line = line}
+
+		is_range: bool
+
+		rr : Range_Rune
+
+		prop: PropList_Property 
+		for field, i in field_iterator(&field_iter) {
+			switch i {
+			case 0: // Code point or code point range
+				for c in field {
+					if !(c >= '0' && c <= '9') && !(c >= 'A' && c <= 'F') {
+						if c == '.' {
+							is_range = true
+							continue
+						} else {
+							err = UCD_Error.Invalid_Hex_Number
+							return
+						}
+					}
+					if is_range {
+						rr.first *= 16
+						rr.first += cast(rune)(c >= '0' && c <= '9')  * cast(rune)(c - '0')  
+						rr.first += cast(rune)(c >= 'A' && c <= 'F')  * cast(rune)(c - 'A' + 10)
+						rr.last = rr.first
+					} else {
+						rr.last *= 16
+						rr.last += cast(rune)(c >= '0' && c <= '9')  * cast(rune)(c - '0')  
+						rr.last += cast(rune)(c >= 'A' && c <= 'F')  * cast(rune)(c - 'A' + 10)
+					}
+				}
+
+			case 1:
+				prop, err = string_to_proplist_property(transmute(string)field)
+				if err != nil {
+					return
+				}
+
+			case:
+				err = UCD_Error.Extra_Fields
+				return
+			}
+		}
+
+		append_to_dynamic_range(&props[prop], rr, allocator)
+	}
+
+	return
+}
+
+
+