Replace iterators; fixes line ending handling

2026-05-25 05:09:53 +00:00 · 2026-03-09 16:25:35 +01:00
parent 1d0510d27f
commit 3f330752cd
3 changed files with 47 additions and 101 deletions
--- a/core/unicode/tools/ucd/generate_unicode.odin
+++ b/core/unicode/tools/ucd/generate_unicode.odin
@@ -1,4 +1,5 @@
-package main
+package ucd
+
 import "core:fmt"
 import path "core:path/filepath"
 import "core:os"
@@ -7,7 +8,6 @@ import "base:runtime"
 import "core:mem"
 import "core:io"
 import "core:log"
-import "ucd"

 // Table 2-3. Types of Code Points
 // Table 4-4. General_Category Values page 229
@@ -16,7 +16,7 @@ import "ucd"


 /*
-Formats a ucd.Dynamic_Range into a set of fixed length arrays and writes
+Formats a Dynamic_Range into a set of fixed length arrays and writes
 corresponding to a io.Writer. The value of the parameter `name`will be used as a
 prefix to the array names. If a dynamic array contained in the `range` is empty,
 no corresponding fixed length array will be written.
@@ -24,12 +24,12 @@ no corresponding fixed length array will be written.
 Inputs:
 - writer: The io.Writer to be written to.
 - name: Prefix to add to any array that is written to `writer`
- range: The ucd.Dynamic_Range to format and write to writer.
+- range: The Dynamic_Range to format and write to writer.
 */
 write_range_arrays :: proc(
 	writer: io.Writer,
 	name: string,
-	range : ucd.Dynamic_Range,
+	range : Dynamic_Range,
 ) -> int {
 	n_written : int
 	if len(range.single_16) > 0 { 
@@ -98,8 +98,8 @@ write_range_arrays :: proc(
 write_range :: proc(
 	writer: io.Writer,
 	name: union{string,
-	ucd.General_Category},
-	range: ucd.Dynamic_Range,
+	General_Category},
+	range: Dynamic_Range,
 ) -> (n_written: int) {
 	buffer: [128]byte
 	str: string
@@ -110,7 +110,7 @@ write_range :: proc(
 		runtime.mem_copy(&buffer[0], raw_data(n), len(n))
 		str = transmute(string) buffer[0:len(n)]

-	case ucd.General_Category:
+	case General_Category:
 		str = fmt.bprintf(buffer[:], "%s", n)
 	}

@@ -221,28 +221,28 @@ main :: proc() {
 		"tests","core","assets","UCD","UnicodeData.txt"}, context.allocator)
 	defer delete(ucd_path)

-	unicode_data, ucd_err := ucd.load_unicode_data(ucd_path)
+	unicode_data, ucd_err := load_unicode_data(ucd_path)
 	if ucd_err != nil {
 		log.errorf("Error loading Unicode data. %s", ucd_err)
 	}
-	defer ucd.destroy_unicode_data(unicode_data)
+	defer destroy_unicode_data(unicode_data)

-	general_category_ranges := ucd.gc_ranges(&unicode_data)
-	defer ucd.destroy_general_category_ranges(general_category_ranges)  
+	general_category_ranges := gc_ranges(&unicode_data)
+	defer destroy_general_category_ranges(general_category_ranges)

-	extra_digits := ucd.extra_digits(&unicode_data)
-	defer ucd.destroy_dynamic_range(extra_digits) 
+	extra_digits := extra_digits(&unicode_data)
+	defer destroy_dynamic_range(extra_digits)


 	proplist_path, _ := path.join({ODIN_ROOT,
 		"tests","core","assets","UCD","PropList.txt"}, context.allocator)
 	defer delete(proplist_path)
-	proplist, proplist_err := ucd.load_protperty_list(proplist_path)
+	proplist, proplist_err := load_protperty_list(proplist_path)
 	if proplist_err != nil {
 		log.errorf("Error loading PropList.txt. %s", proplist_err)
 		return
 	}
-	defer ucd.destroy_protperty_list(proplist) 
+	defer destroy_protperty_list(proplist)



@@ -267,7 +267,7 @@ main :: proc() {

 	//List of the general categories to skip when generating the code for
 	//core/unicode/generated.txt. 
-	to_exclude := [?]ucd.General_Category{
+	to_exclude := [?]General_Category{
 		.Cc, // Control, a C0 or C1 control code
 		.Cf, // Format, a format control character
 		.Cn, // Unassigned, a reserved unassigned code point or a noncharacter
--- a/core/unicode/tools/ucd/iterator.odin
+++ b/core/unicode/tools/ucd/iterator.odin
@@ -1,70 +0,0 @@
-package ucd
-
-/*
-An iterator that allows simple iterating over the lines of of a slice of bytes, []byte,
-without allocating. Each line must end in a new line, i.e., '\n'
-*/
-Line_Iterator :: struct {
-	index: int, // current location in data
-	data: []byte, // Data over which to iterate
-	line_counter: int, // line number storage  
-}
-
-line_iterator :: proc(it: ^Line_Iterator) -> (line: []byte, line_number: int,  more: bool) {
-	more = it.index < len(it.data)
-	if more {
-		it.line_counter += 1
-		line_number = it.line_counter
-	} else {
-		return
-	}	
-	start:= it.index
-	for it.index < len(it.data) && it.data[it.index] != '\n' && it.data[it.index] != '#' do it.index += 1
-	line = it.data[start:it.index]
-	//index = start
-
-	if it.index < len(it.data) && it.data[it.index] == '#' {
-		for it.index < len(it.data) && it.data[it.index] != '\n' do it.index += 1
-	}
-	if it.index < len(it.data) && it.data[it.index] == '\n' do it.index += 1
-	return
-}
-
-Field_Iterator :: struct {
-	index: int,
-	field_counter: int,
-	line: []byte,
-}
-
-field_iterator :: proc(it: ^Field_Iterator) -> (field: []byte, field_count: int,  valid: bool) {
-	valid = it.index < len(it.line) && it.line[it.index] != '\n' && it.line[it.index] != '#'
-	if !valid do return
-
-	if it.index < len(it.line) && it.index != 0 && it.line[it.index] == ';' do it. index += 1
-
-	start := it.index
-	for it.index < len(it.line) && it.line[it.index] != ';'  && it.line[it.index] != '#' do it.index += 1
-
-	field = it.line[start:it.index]	
-	temp := field
-
-	// Remove leading spaces
-	for b, i in temp {
-		if b != ' ' {
-			field = temp[i:]
-			break
-		}
-	}
-
-	// Remove trailing spaces
-	temp = field
-	for b, i in temp {
-		if b != ' ' {
-			field = temp[0:i+1]
-		}
-	}
-
-	field_count = it.field_counter
-	it.field_counter += 1
-	return
-}
--- a/core/unicode/tools/ucd/ucd.odin
+++ b/core/unicode/tools/ucd/ucd.odin
@@ -15,14 +15,17 @@ load_unicode_data :: proc(
 	}
 	defer free_all(context.temp_allocator)

-	line_iter := Line_Iterator{data = data }
+	// line_iter := Line_Iterator{data = data }
 	first_cp: rune

-	line_loop: for line, line_num in line_iterator(&line_iter) {
-		// Skip empty lines
+	str := string(data)
+	line_no := 1
+	line_loop: for _line in strings.split_lines_iterator(&str) {
+		defer line_no += 1
+		line, _, _ := strings.partition(_line, "#")
 		if len(line) == 0 do continue

-		field_iter := Field_Iterator{line = line}
+		// field_iter := Field_Iterator{line = line}
 		is_range := false
 		cp: rune
 		name: string
@@ -33,7 +36,11 @@ load_unicode_data :: proc(
 		nt := Numeric_Type.None
 		nv : Numberic_Value

-		for field, field_num in field_iterator(&field_iter) {
+		field_num := 0
+		for field in strings.split_iterator(&line, ";") {
+			defer field_num += 1
+			field := strings.trim_space(field)
+
 			switch field_num {
 			case 0: // Code point
 				cp = 0
@@ -52,10 +59,10 @@ load_unicode_data :: proc(
 				}
 				
 				if len(field) > 9 && field[0] == '<' && strings.ends_with(transmute(string) field, ", Last>") {
-					name = strings.clone_from_bytes(field[1:len(field)-7], allocator)
+					name = strings.clone(field[1:len(field)-7], allocator)
 					is_range = true
 				} else {
-					name = strings.clone_from_bytes(field[:], allocator)
+					name = strings.clone(field[:], allocator)
 				}

 			case 2: // General_Category
@@ -236,6 +243,8 @@ destroy_protperty_list :: proc(
 	}
 }

+import "core:fmt"
+
 load_protperty_list :: proc (
 	filename : string,
 	allocator := context.allocator,
@@ -251,16 +260,26 @@ load_protperty_list :: proc (
 	line_iter := Line_Iterator{
 		data = data
 	}
-	for line in line_iterator(&line_iter) {
+
+	str := string(data)
+	line_no := 1
+	for _line in strings.split_lines_iterator(&str) {
+		defer line_no += 1
+		line, _, _ := strings.partition(_line, "#")
 		if len(line) == 0 do continue
-		field_iter := Field_Iterator{ line = line}
+		fmt.printfln("%d: %q", line_no, line)

 		is_range: bool

 		rr : Range_Rune

 		prop: PropList_Property 
-		for field, i in field_iterator(&field_iter) {
+		i := 0
+		for field in strings.split_iterator(&line, ";") {
+			defer i += 1
+			field := strings.trim_space(field)
+			fmt.printfln("%d: %q", i, field)
+
 			switch i {
 			case 0: // Code point or code point range
 				for c in field {
@@ -302,7 +321,4 @@ load_protperty_list :: proc (
 	}

 	return
-}
-
-
-
+}