Replace iterators; fixes line ending handling

This commit is contained in:
Jeroen van Rijn
2026-03-09 16:25:35 +01:00
parent 1d0510d27f
commit 3f330752cd
3 changed files with 47 additions and 101 deletions

View File

@@ -1,4 +1,5 @@
package main
package ucd
import "core:fmt"
import path "core:path/filepath"
import "core:os"
@@ -7,7 +8,6 @@ import "base:runtime"
import "core:mem"
import "core:io"
import "core:log"
import "ucd"
// Table 2-3. Types of Code Points
// Table 4-4. General_Category Values page 229
@@ -16,7 +16,7 @@ import "ucd"
/*
Formats a ucd.Dynamic_Range into a set of fixed length arrays and writes
Formats a Dynamic_Range into a set of fixed length arrays and writes
corresponding to a io.Writer. The value of the parameter `name`will be used as a
prefix to the array names. If a dynamic array contained in the `range` is empty,
no corresponding fixed length array will be written.
@@ -24,12 +24,12 @@ no corresponding fixed length array will be written.
Inputs:
- writer: The io.Writer to be written to.
- name: Prefix to add to any array that is written to `writer`
- range: The ucd.Dynamic_Range to format and write to writer.
- range: The Dynamic_Range to format and write to writer.
*/
write_range_arrays :: proc(
writer: io.Writer,
name: string,
range : ucd.Dynamic_Range,
range : Dynamic_Range,
) -> int {
n_written : int
if len(range.single_16) > 0 {
@@ -98,8 +98,8 @@ write_range_arrays :: proc(
write_range :: proc(
writer: io.Writer,
name: union{string,
ucd.General_Category},
range: ucd.Dynamic_Range,
General_Category},
range: Dynamic_Range,
) -> (n_written: int) {
buffer: [128]byte
str: string
@@ -110,7 +110,7 @@ write_range :: proc(
runtime.mem_copy(&buffer[0], raw_data(n), len(n))
str = transmute(string) buffer[0:len(n)]
case ucd.General_Category:
case General_Category:
str = fmt.bprintf(buffer[:], "%s", n)
}
@@ -221,28 +221,28 @@ main :: proc() {
"tests","core","assets","UCD","UnicodeData.txt"}, context.allocator)
defer delete(ucd_path)
unicode_data, ucd_err := ucd.load_unicode_data(ucd_path)
unicode_data, ucd_err := load_unicode_data(ucd_path)
if ucd_err != nil {
log.errorf("Error loading Unicode data. %s", ucd_err)
}
defer ucd.destroy_unicode_data(unicode_data)
defer destroy_unicode_data(unicode_data)
general_category_ranges := ucd.gc_ranges(&unicode_data)
defer ucd.destroy_general_category_ranges(general_category_ranges)
general_category_ranges := gc_ranges(&unicode_data)
defer destroy_general_category_ranges(general_category_ranges)
extra_digits := ucd.extra_digits(&unicode_data)
defer ucd.destroy_dynamic_range(extra_digits)
extra_digits := extra_digits(&unicode_data)
defer destroy_dynamic_range(extra_digits)
proplist_path, _ := path.join({ODIN_ROOT,
"tests","core","assets","UCD","PropList.txt"}, context.allocator)
defer delete(proplist_path)
proplist, proplist_err := ucd.load_protperty_list(proplist_path)
proplist, proplist_err := load_protperty_list(proplist_path)
if proplist_err != nil {
log.errorf("Error loading PropList.txt. %s", proplist_err)
return
}
defer ucd.destroy_protperty_list(proplist)
defer destroy_protperty_list(proplist)
@@ -267,7 +267,7 @@ main :: proc() {
//List of the general categories to skip when generating the code for
//core/unicode/generated.txt.
to_exclude := [?]ucd.General_Category{
to_exclude := [?]General_Category{
.Cc, // Control, a C0 or C1 control code
.Cf, // Format, a format control character
.Cn, // Unassigned, a reserved unassigned code point or a noncharacter

View File

@@ -1,70 +0,0 @@
package ucd
/*
An iterator that allows simple iterating over the lines of of a slice of bytes, []byte,
without allocating. Each line must end in a new line, i.e., '\n'
*/
Line_Iterator :: struct {
index: int, // current location in data
data: []byte, // Data over which to iterate
line_counter: int, // line number storage
}
line_iterator :: proc(it: ^Line_Iterator) -> (line: []byte, line_number: int, more: bool) {
more = it.index < len(it.data)
if more {
it.line_counter += 1
line_number = it.line_counter
} else {
return
}
start:= it.index
for it.index < len(it.data) && it.data[it.index] != '\n' && it.data[it.index] != '#' do it.index += 1
line = it.data[start:it.index]
//index = start
if it.index < len(it.data) && it.data[it.index] == '#' {
for it.index < len(it.data) && it.data[it.index] != '\n' do it.index += 1
}
if it.index < len(it.data) && it.data[it.index] == '\n' do it.index += 1
return
}
Field_Iterator :: struct {
index: int,
field_counter: int,
line: []byte,
}
field_iterator :: proc(it: ^Field_Iterator) -> (field: []byte, field_count: int, valid: bool) {
valid = it.index < len(it.line) && it.line[it.index] != '\n' && it.line[it.index] != '#'
if !valid do return
if it.index < len(it.line) && it.index != 0 && it.line[it.index] == ';' do it. index += 1
start := it.index
for it.index < len(it.line) && it.line[it.index] != ';' && it.line[it.index] != '#' do it.index += 1
field = it.line[start:it.index]
temp := field
// Remove leading spaces
for b, i in temp {
if b != ' ' {
field = temp[i:]
break
}
}
// Remove trailing spaces
temp = field
for b, i in temp {
if b != ' ' {
field = temp[0:i+1]
}
}
field_count = it.field_counter
it.field_counter += 1
return
}

View File

@@ -15,14 +15,17 @@ load_unicode_data :: proc(
}
defer free_all(context.temp_allocator)
line_iter := Line_Iterator{data = data }
// line_iter := Line_Iterator{data = data }
first_cp: rune
line_loop: for line, line_num in line_iterator(&line_iter) {
// Skip empty lines
str := string(data)
line_no := 1
line_loop: for _line in strings.split_lines_iterator(&str) {
defer line_no += 1
line, _, _ := strings.partition(_line, "#")
if len(line) == 0 do continue
field_iter := Field_Iterator{line = line}
// field_iter := Field_Iterator{line = line}
is_range := false
cp: rune
name: string
@@ -33,7 +36,11 @@ load_unicode_data :: proc(
nt := Numeric_Type.None
nv : Numberic_Value
for field, field_num in field_iterator(&field_iter) {
field_num := 0
for field in strings.split_iterator(&line, ";") {
defer field_num += 1
field := strings.trim_space(field)
switch field_num {
case 0: // Code point
cp = 0
@@ -52,10 +59,10 @@ load_unicode_data :: proc(
}
if len(field) > 9 && field[0] == '<' && strings.ends_with(transmute(string) field, ", Last>") {
name = strings.clone_from_bytes(field[1:len(field)-7], allocator)
name = strings.clone(field[1:len(field)-7], allocator)
is_range = true
} else {
name = strings.clone_from_bytes(field[:], allocator)
name = strings.clone(field[:], allocator)
}
case 2: // General_Category
@@ -236,6 +243,8 @@ destroy_protperty_list :: proc(
}
}
import "core:fmt"
load_protperty_list :: proc (
filename : string,
allocator := context.allocator,
@@ -251,16 +260,26 @@ load_protperty_list :: proc (
line_iter := Line_Iterator{
data = data
}
for line in line_iterator(&line_iter) {
str := string(data)
line_no := 1
for _line in strings.split_lines_iterator(&str) {
defer line_no += 1
line, _, _ := strings.partition(_line, "#")
if len(line) == 0 do continue
field_iter := Field_Iterator{ line = line}
fmt.printfln("%d: %q", line_no, line)
is_range: bool
rr : Range_Rune
prop: PropList_Property
for field, i in field_iterator(&field_iter) {
i := 0
for field in strings.split_iterator(&line, ";") {
defer i += 1
field := strings.trim_space(field)
fmt.printfln("%d: %q", i, field)
switch i {
case 0: // Code point or code point range
for c in field {
@@ -302,7 +321,4 @@ load_protperty_list :: proc (
}
return
}
}