Merge pull request #6393 from Kelimion/unicode

Unicode
This commit is contained in:
Jeroen van Rijn
2026-03-09 20:00:09 +01:00
committed by GitHub
10 changed files with 46464 additions and 34 deletions

2335
core/unicode/generated.odin Normal file

File diff suppressed because it is too large Load Diff

45
core/unicode/inrange.odin Normal file
View File

@@ -0,0 +1,45 @@
package unicode
/*
Check to see if the rune `r` is in `range`
*/
in_range :: proc(r: rune, range: Range) -> bool {
if r <= 0xFFFF {
r16 := cast(u16) r
length := len(range.ranges_16)
index := binary_search(r16, range.ranges_16, length/2, 2) if length > 0 else -1
if index >= 0 && range.ranges_16[index] <= r16 && range.ranges_16[index+1] >= r16 {
return true
}
length = len(range.single_16)
index = binary_search(r16, range.single_16, length, 1) if length > 0 else -1
if index >= 0 && range.single_16[index] == r16 {
return true
}
}
r32 := cast(i32) r
length := len(range.ranges_32)
index := binary_search(r32, range.ranges_32, length/2, 2) if length >0 else -1
if index >= 0 && range.ranges_32[index] <= r32 && range.ranges_32[index+1] >= r32 {
return true
}
length = len(range.single_32)
index = binary_search(r32, range.single_32, length, 1) if length > 0 else -1
if index >= 0 && range.single_32[index] == r32 {
return true
}
return false
}

View File

@@ -13,7 +13,7 @@ ZERO_WIDTH_JOINER :: '\u200D'
WORD_JOINER :: '\u2060'
@(require_results)
binary_search :: proc(c: i32, table: []i32, length, stride: int, loc := #caller_location) -> int #no_bounds_check {
binary_search :: proc(c: $T, table: []T, length, stride: int, loc := #caller_location) -> int #no_bounds_check {
runtime.bounds_check_error_loc(loc, length*stride-1, len(table))
n := length
t := 0
@@ -75,16 +75,7 @@ is_lower :: proc(r: rune) -> bool #no_bounds_check {
if r <= MAX_ASCII {
return u32(r)-'a' < 26
}
c := i32(r)
p := binary_search(c, to_upper_ranges[:], len(to_upper_ranges)/3, 3)
if p >= 0 && to_upper_ranges[p] <= c && c <= to_upper_ranges[p+1] {
return true
}
p = binary_search(c, to_upper_singlets[:], len(to_upper_singlets)/2, 2)
if p >= 0 && c == to_upper_singlets[p] {
return true
}
return false
return in_range(r, ll_ranges) || in_range(r, other_lowercase_ranges)
}
@(require_results)
@@ -92,19 +83,22 @@ is_upper :: proc(r: rune) -> bool #no_bounds_check {
if r <= MAX_ASCII {
return u32(r)-'A' < 26
}
c := i32(r)
p := binary_search(c, to_lower_ranges[:], len(to_lower_ranges)/3, 3)
if p >= 0 && to_lower_ranges[p] <= c && c <= to_lower_ranges[p+1] {
return true
}
p = binary_search(c, to_lower_singlets[:], len(to_lower_singlets)/2, 2)
if p >= 0 && c == to_lower_singlets[p] {
return true
}
return false
return in_range(r, lu_ranges) || in_range(r, other_uppercase_ranges)
}
is_alpha :: is_letter
/*
Return true if the rune `r` is a letter. Being a letter means that the rune has
the Unicode general category property of L. In practice, the character will have
a general category property of Ll, Lm, Lo, Lt, or Lu.
Inputs:
- r: The rune which will be check for having the property of being a letter.
Returns:
`true` when the rune `r` is a letter. `false` will be returned in all other cases.
*/
@(require_results)
is_letter :: proc(r: rune) -> bool #no_bounds_check {
if u32(r) <= MAX_LATIN1 {
@@ -114,16 +108,9 @@ is_letter :: proc(r: rune) -> bool #no_bounds_check {
return true
}
c := i32(r)
p := binary_search(c, alpha_ranges[:], len(alpha_ranges)/2, 2)
if p >= 0 && alpha_ranges[p] <= c && c <= alpha_ranges[p+1] {
return true
}
p = binary_search(c, alpha_singlets[:], len(alpha_singlets), 1)
if p >= 0 && c == alpha_singlets[p] {
return true
}
return false
ll_lu := in_range(r, ll_ranges) || in_range(r, lu_ranges)
return ll_lu || in_range(r, lo_ranges) || in_range(r, lt_ranges) || in_range(r, lm_ranges)
}
@(require_results)
@@ -131,11 +118,45 @@ is_title :: proc(r: rune) -> bool {
return is_upper(r) && is_lower(r)
}
/*
Returns true if the rune `r` is in the General Category Nd
Inputs:
- r: The run to check if it is in the general category Nd.
Returns:
`true` if the rune is in the general category Nd and `false` otherwise
*/
is_decimal :: proc(r: rune) -> bool {
return in_range(r, nd_ranges)
}
/*
This function determincs if a rune is a digit. To be a digit the
charage either has a Numeric_Type of Digit or Decimal.
Inputs:
- r: The rune to check if it is a digit.
Returns:
`true` if the rune `r` is a digit, `false` in all other cases
*/
@(require_results)
is_digit :: proc(r: rune) -> bool {
if r <= MAX_LATIN1 {
return '0' <= r && r <= '9'
return ('0' <= r && r <= '9') || r == 0x00B9 || (r >= 0x00B2 && r <= 0x0B3)
}
if in_range(r, nd_ranges) {
return true
}
if in_range(r, extra_digits_ranges) {
return true
}
return false
}
@@ -176,6 +197,15 @@ is_graphic :: proc(r: rune) -> bool {
if u32(r) <= MAX_LATIN1 {
return char_properties[u8(r)]&pg != 0
}
if is_letter(r) || is_number(r) || is_punct(r) || is_symbol(r) || in_range(r, zs_ranges) {
return true
}
if in_range(r, mc_ranges) || in_range(r, me_ranges) || in_range(r, mn_ranges) {
return true
}
return false
}
@@ -195,12 +225,25 @@ is_control :: proc(r: rune) -> bool #no_bounds_check {
return false
}
/*
Checks to see if the rune `r` is a number. This means the rune is a member
of the general category Nd, Nl, or No.
Inputs:
r: The rune to check if it is number.
Returns:
`true` if the ruen belongs to the general category Nd, Nl, or No. `false`
is return in all other cases.
*/
@(require_results)
is_number :: proc(r: rune) -> bool #no_bounds_check {
if u32(r) <= MAX_LATIN1 {
return char_properties[u8(r)]&pN != 0
}
return false
return in_range(r, nd_ranges) || in_range(r, nl_ranges) || in_range(r, no_ranges)
}
@(require_results)
@@ -208,7 +251,16 @@ is_punct :: proc(r: rune) -> bool #no_bounds_check {
if u32(r) <= MAX_LATIN1 {
return char_properties[u8(r)]&pP != 0
}
return false
if in_range(r, pc_ranges) || in_range(r, pd_ranges) || in_range(r, pe_ranges) {
return true
}
if in_range(r, pf_ranges) || in_range(r, pi_ranges) || in_range(r, po_ranges) {
return true
}
return in_range(r, ps_ranges)
}
@(require_results)
@@ -216,6 +268,13 @@ is_symbol :: proc(r: rune) -> bool #no_bounds_check {
if u32(r) <= MAX_LATIN1 {
return char_properties[u8(r)]&pS != 0
}
s := in_range(r, sc_ranges) || in_range(r, sm_ranges)
if s || in_range(r, so_ranges) || in_range(r, sk_ranges) {
return true
}
return false
}

View File

@@ -0,0 +1,287 @@
package ucd
import "core:fmt"
import "core:os"
import "core:strings"
import "core:mem"
import "core:io"
import "core:log"
// Table 2-3. Types of Code Points
// Table 4-4. General_Category Values page 229
// Reference https://www.unicode.org/reports/tr44/
/*
Formats a `Dynamic_Range` into a set of fixed length arrays and writes them to an `io.Writer`.
The value of the parameter `name` will be used as a prefix to the array names.
If a dynamic array contained in the `range` is empty, no corresponding fixed length array will be written.
Inputs:
- writer: The `io.Writer` to be written to.
- name: Prefix to add to any array that is written to `writer`
- range: `The Dynamic_Range` to format and write to writer.
*/
write_range_arrays :: proc(writer: io.Writer, name: string, range: Dynamic_Range) {
if len(range.single_16) > 0 {
fmt.wprintln(writer, "@(rodata)")
fmt.wprintf(writer, "%s_singles16 := [?]u16{{", name)
for v, count in range.single_16 {
if count % 8 == 0 {
fmt.wprintf(writer, "\n\t0x%4X,", v)
continue
} else {
fmt.wprintf(writer, " 0x%4X,", v)
}
}
fmt.wprintln(writer, "\n}\n")
}
if len(range.ranges_16) > 0 {
fmt.wprintln(writer, "@(rodata)")
fmt.wprintfln(writer, "%s_ranges16 := [?]u16{{", name)
for v in range.ranges_16 {
fmt.wprintfln(writer, "\t0x%4X, 0x%4X,", v.first, v.last)
}
fmt.wprintln(writer, "}\n")
}
if len(range.single_32) > 0 {
fmt.wprintln(writer, "@(rodata)")
fmt.wprintf(writer, "%s_singles32 := [?]i32{{", name)
for v, count in range.single_32 {
if count % 8 == 0 {
fmt.wprintf(writer, "\n\t0x%4X,", v)
continue
} else {
fmt.wprintf(writer, " 0x%4X,", v)
}
}
fmt.wprintln(writer, "\n}\n")
}
if len(range.ranges_32) > 0 {
fmt.wprintln(writer, "@(rodata)")
fmt.wprintfln(writer, "%s_ranges32 := [?]i32{{", name)
for v in range.ranges_32 {
fmt.wprintfln(writer, "\t0x%4X, 0x%4X,", v.first, v.last)
}
fmt.wprintln(writer, "}\n")
}
return
}
write_range :: proc(writer: io.Writer, name: union{string, General_Category}, range: Dynamic_Range) {
buffer: [128]byte
str: string
switch n in name {
case string:
assert(len(n) <= len(buffer))
copy(buffer[:], n)
str = string(buffer[:len(n)])
case General_Category:
str = fmt.bprintf(buffer[:], "%s", n)
}
// lowercase table names
for &b in buffer[0:len(str)] {
if b >= 'A' && b <= 'Z' {
b += ('a' - 'A')
}
}
write_range_arrays(writer, str, range)
fmt.wprintfln(writer, "%s_ranges := Range{{", str)
if len(range.single_16) > 0 {
fmt.wprintfln(writer, "\tsingle_16 = %s_singles16[:],", str)
}
if len(range.ranges_16) > 0 {
fmt.wprintfln(writer, "\tranges_16 = %s_ranges16[:],", str)
}
if len(range.single_32) > 0 {
fmt.wprintfln(writer, "\tsingle_32 = %s_singles32[:],", str)
}
if len(range.ranges_32) > 0 {
fmt.wprintfln(writer, "\tranges_32 = %s_ranges32[:],", str)
}
fmt.wprintln(writer, "}\n")
return
}
GENERATED :: `/*
------ GENERATED ------ DO NOT EDIT ------ GENERATED ------ DO NOT EDIT ------ GENERATED ------
*/
`
MESSAGE :: `/*
This file is generated from UnicodeData.txt and PropList.txt. These files
are part of the Unicode Database (UCD) and are covered by the license
listed further down. They may be downloaded from the following locations;
https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
https://www.unicode.org/license.txt
------------------------------------------------------------------------------
UNICODE LICENSE V3
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2026 Unicode, Inc.
NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
*/
`
main :: proc() {
track: mem.Tracking_Allocator
mem.tracking_allocator_init(&track, context.allocator)
defer {
if len(track.allocation_map) > 0 {
fmt.eprintf("=== %v allocations not freed: ===\n", len(track.allocation_map))
for _, entry in track.allocation_map {
fmt.eprintf("- %v bytes @ %v\n", entry.size, entry.location)
}
}
mem.tracking_allocator_destroy(&track)
}
context.allocator = mem.tracking_allocator(&track)
context.logger = log.create_console_logger()
defer log.destroy_console_logger(context.logger)
ucd_path := ODIN_ROOT + "tests/core/assets/UCD/UnicodeData.txt"
unicode_data, ucd_err := load_unicode_data(ucd_path)
if ucd_err != nil {
log.errorf("Error loading Unicode data. %s", ucd_err)
}
defer destroy_unicode_data(unicode_data)
general_category_ranges := gc_ranges(&unicode_data)
defer destroy_general_category_ranges(general_category_ranges)
extra_digits := extra_digits(&unicode_data)
defer destroy_dynamic_range(extra_digits)
proplist_path := ODIN_ROOT + "tests/core/assets/UCD/PropList.txt"
proplist, proplist_err := load_property_list(proplist_path)
if proplist_err != nil {
log.errorf("Error loading PropList.txt. %s", proplist_err)
return
}
defer destroy_property_list(proplist)
sb := strings.builder_make_len_cap(0, 1024*32)
defer strings.builder_destroy(&sb)
writer := strings.to_writer(&sb)
fmt.wprintfln(writer, "package unicode\n")
fmt.wprintln(writer, GENERATED)
fmt.wprintln(writer, MESSAGE)
Range_Type :: "Range :: struct {\n" +
"\tsingle_16 : []u16,\n" +
"\tranges_16 : []u16,\n" +
"\tsingle_32 : []i32,\n" +
"\tranges_32 : []i32,\n" +
"}\n"
fmt.wprintfln(writer, "%s", Range_Type)
//List of the general categories to skip when generating the code for
//core/unicode/generated.txt.
to_exclude := [?]General_Category{
.Cc, // Control, a C0 or C1 control code
.Cf, // Format, a format control character
.Cn, // Unassigned, a reserved unassigned code point or a noncharacter
.Co, // Private_Use, a private-use character
.Cs, // Surrogate, a surrogate code point
// .Ll, // Lowercase_Letter, a lowercase letter
// .Lm, // Modifier_Letter, a modifier letter
// .Lo, // Other_Letter, other letters, including syllables and ideographs
// .Lt, // Titlecase_Letter, a digraph encoded as a single character, with first part uppercase
// .Lu, // Uppercase_Letter, an uppercase letter
// .Mc, // Spacing_Mark, a spacing combining mark (positive advance width)
// .Me, // Enclosing_Mark, an enclosing combining mark
// .Mn, // Nonspacing_Mark, a nonspacing combining mark (zero advance width)
//.Nd, // Decimal_Number, a decimal digit
//.Nl, // Letter_Number, a letterlike numeric character
//.No, // Other_Number, a numeric character of other type
// .Pc, // Connector_Punctuation, a connecting punctuation mark, like a tie
// .Pd, // Dash_Punctuation, a dash or hyphen punctuation mark
// .Pe, // Close_Punctuation, a closing punctuation mark (of a pair)
// .Pf, // Final_Punctuation, a final quotation mark
// .Pi, // Initial_Punctuation, an initial quotation mark
// .Po, // Other_Punctuation, a punctuation mark of other type
// .Ps, // Open_Punctuation, an opening punctuation mark (of a pair)
// .Sc, // Currency_Symbol, a currency sign
// .Sk, // Modifier_Symbol, a non-letterlike modifier symbol
// .Sm, // Math_Symbol, a symbol of mathematical use
// .So, // Other_Symbol, a symbol of other type
.Zl, // Line_Separator, U+2028 LINE SEPARATOR only
.Zp, // Paragraph_Separator, U+2029 PARAGRAPH SEPARATOR only
//.Zs, // Space_Separator, a space character (of various non-zero widths)
}
write_loop: for range, category in general_category_ranges {
for excluded in to_exclude {
if category == excluded {
continue write_loop
}
}
write_range(writer, category, range)
}
write_range(writer, "extra_digits", extra_digits)
write_range(writer, "other_lowercase", proplist[.Other_Lowercase])
write_range(writer, "other_uppercase", proplist[.Other_Uppercase])
file_name := ODIN_ROOT + "core/unicode/generated.odin"
if write_error := os.write_entire_file_from_string(file_name, strings.to_string(sb)); write_error != nil {
log.errorf("Error %v writing %q", write_error, file_name)
}
}

View File

@@ -0,0 +1,162 @@
package ucd
string_to_general_category :: proc "contextless" (str: string) -> (gc: General_Category, err: Error) {
switch str {
case "Lu": gc = .Lu
case "Ll": gc = .Ll
case "Lt": gc = .Lt
case "Lm": gc = .Lm
case "Lo": gc = .Lo
case "Mn": gc = .Mn
case "Mc": gc = .Mc
case "Me": gc = .Me
case "Nd": gc = .Nd
case "Nl": gc = .Nl
case "No": gc = .No
case "Pc": gc = .Pc
case "Pd": gc = .Pd
case "Ps": gc = .Ps
case "Pe": gc = .Pe
case "Pi": gc = .Pi
case "Pf": gc = .Pf
case "Po": gc = .Po
case "Sm": gc = .Sm
case "Sc": gc = .Sc
case "Sk": gc = .Sk
case "So": gc = .So
case "Zs": gc = .Zs
case "Zl": gc = .Zl
case "Zp": gc = .Zp
case "Cc": gc = .Cc
case "Cf": gc = .Cf
case "Cs": gc = .Cs
case "Co": gc = .Co
case "Cn": gc = .Cn
case: err = .Invalid_General_Category
}
return
}
string_to_proplist_property :: proc(str: string) -> (prop: Prop_List_Property) {
switch str {
case "White_Space": prop = .White_Space
case "Bidi_Control": prop = .Bidi_Control
case "Join_Control": prop = .Join_Control
case "Dash": prop = .Dash
case "Hyphen": prop = .Hyphen
case "Quotation_Mark": prop = .Quotation_Mark
case "Terminal_Punctuation": prop = .Terminal_Punctuation
case "Other_Math": prop = .Other_Math
case "Hex_Digit": prop = .Hex_Digit
case "ASCII_Hex_Digit": prop = .ASCII_Hex_Digit
case "Other_Alphabetic": prop = .Other_Alphabetic
case "Ideographic": prop = .Ideographic
case "Diacritic": prop = .Diacritic
case "Extender": prop = .Extender
case "Other_Lowercase": prop = .Other_Lowercase
case "Other_Uppercase": prop = .Other_Uppercase
case "Noncharacter_Code_Point": prop = .Noncharacter_Code_Point
case "Other_Grapheme_Extend": prop = .Other_Grapheme_Extend
case "IDS_Binary_Operator": prop = .IDS_Binary_Operator
case "IDS_Trinary_Operator": prop = .IDS_Trinary_Operator
case "IDS_Unary_Operator": prop = .IDS_Unary_Operator
case "Radical": prop = .Radical
case "Unified_Ideograph": prop = .Unified_Ideograph
case "Other_Default_Ignorable_Code_Point": prop = .Other_Default_Ignorable_Code_Point
case "Deprecated": prop = .Deprecated
case "Soft_Dotted": prop = .Soft_Dotted
case "Logical_Order_Exception": prop = .Logical_Order_Exception
case "Other_ID_Start": prop = .Other_ID_Start
case "Other_ID_Continue": prop = .Other_ID_Continue
case "ID_Compat_Math_Continue": prop = .ID_Compat_Math_Continue
case "ID_Compat_Math_Start": prop = .ID_Compat_Math_Start
case "Sentence_Terminal": prop = .Sentence_Terminal
case "Variation_Selector": prop = .Variation_Selector
case "Pattern_White_Space": prop = .Pattern_White_Space
case "Pattern_Syntax": prop = .Pattern_Syntax
case "Prepended_Concatenation_Mark": prop = .Prepended_Concatenation_Mark
case "Regional_Indicator": prop = .Regional_Indicator
case "Modifier_Combining_Mark": prop = .Modifier_Combining_Mark
case: prop = .Unknown_Property
}
return
}
@(deprecated="Unused?")
string_to_age :: proc "contextless" (str: string) -> (age: Age) {
switch str {
case "1.1": age = .Age_1_1
case "2.0": age = .Age_2_0
case "2.1": age = .Age_2_1
case "3.0": age = .Age_3_0
case "3.1": age = .Age_3_1
case "3.2": age = .Age_3_2
case "4.0": age = .Age_4_0
case "4.1": age = .Age_4_1
case "5.0": age = .Age_5_0
case "5.1": age = .Age_5_1
case "5.2": age = .Age_5_2
case "6.0": age = .Age_6_0
case "6.1": age = .Age_6_1
case "6.2": age = .Age_6_2
case "6.3": age = .Age_6_3
case "7.0": age = .Age_7_0
case "8.0": age = .Age_8_0
case "9.0": age = .Age_9_0
case "10.0": age = .Age_10_0
case "11.0": age = .Age_11_0
case "12.0": age = .Age_12_0
case "12.1": age = .Age_12_1
case "13.0": age = .Age_13_0
case "14.0": age = .Age_14_0
case "15.0": age = .Age_15_0
case "15.1": age = .Age_15_1
case "16.0": age = .Age_16_0
case "17.0": age = .Age_17_0
case "unassigned": age = .Age_Unassigned
case: age = .Age_Unknown
}
return
}
@(deprecated="Unused?")
string_to_paired_bracket_type :: proc "contextless" (str: string) -> (pbt: Paired_Bracket_Type) {
switch str {
case "o": pbt = .Open
case "c": pbt = .Close
case "n": pbt = .None
case: pbt = .Unknown
}
return
}
@(deprecated="Unused?")
string_to_bidi_class :: proc "contextless" (str: string) -> (class: Bidi_Class) {
switch str {
case "AL": class = .AL
case "AN": class = .AN
case "B": class = .B
case "BN": class = .BN
case "CS": class = .CS
case "EN": class = .EN
case "ES": class = .ES
case "ET": class = .ET
case "FSI": class = .FSI
case "L": class = .L
case "LRE": class = .LRE
case "LRI": class = .LRI
case "LRO": class = .LRO
case "NSM": class = .NSM
case "ON": class = .ON
case "PDF": class = .PDF
case "PDI": class = .PDI
case "R": class = .R
case "RLE": class = .RLE
case "RLI": class = .RLI
case "RLO": class = .RLO
case "S": class = .S
case "WS": class = .WS
case: class = .Unknown
}
return
}

View File

@@ -0,0 +1,694 @@
package ucd
import "core:os"
Age :: enum byte {
Age_Unknown = 0,
Age_1_1,
Age_2_0,
Age_2_1,
Age_3_0,
Age_3_1,
Age_3_2,
Age_4_0,
Age_4_1,
Age_5_0,
Age_5_1,
Age_5_2,
Age_6_0,
Age_6_1,
Age_6_2,
Age_6_3,
Age_7_0,
Age_8_0,
Age_9_0,
Age_10_0,
Age_11_0,
Age_12_0,
Age_12_1,
Age_13_0,
Age_14_0,
Age_15_0,
Age_15_1,
Age_16_0,
Age_17_0,
Age_Unassigned,
}
General_Category :: enum {
Cc, // Control, a C0 or C1 control code
Cf, // Format, a format control character
Cn, // Unassigned, a reserved unassigned code point or a noncharacter
Co, // Private_Use, a private-use character
Cs, // Surrogate, a surrogate code point
Ll, // Lowercase_Letter, a lowercase letter
Lm, // Modifier_Letter, a modifier letter
Lo, // Other_Letter, other letters, including syllables and ideographs
Lt, // Titlecase_Letter, a digraph encoded as a single character, with first part uppercase
Lu, // Uppercase_Letter, an uppercase letter
Mc, // Spacing_Mark, a spacing combining mark (positive advance width)
Me, // Enclosing_Mark, an enclosing combining mark
Mn, // Nonspacing_Mark, a nonspacing combining mark (zero advance width)
Nd, // Decimal_Number, a decimal digit
Nl, // Letter_Number, a letterlike numeric character
No, // Other_Number, a numeric character of other type
Pc, // Connector_Punctuation, a connecting punctuation mark, like a tie
Pd, // Dash_Punctuation, a dash or hyphen punctuation mark
Pe, // Close_Punctuation, a closing punctuation mark (of a pair)
Pf, // Final_Punctuation, a final quotation mark
Pi, // Initial_Punctuation, an initial quotation mark
Po, // Other_Punctuation, a punctuation mark of other type
Ps, // Open_Punctuation, an opening punctuation mark (of a pair)
Sc, // Currency_Symbol, a currency sign
Sk, // Modifier_Symbol, a non-letterlike modifier symbol
Sm, // Math_Symbol, a symbol of mathematical use
So, // Other_Symbol, a symbol of other type
Zl, // Line_Separator, U+2028 LINE SEPARATOR only
Zp, // Paragraph_Separator, U+2029 PARAGRAPH SEPARATOR only
Zs, // Space_Separator, a space character (of various non-zero widths)
}
Block :: enum {
Nil = 0,
Adlam,
Aegean_Numbers,
Ahom,
Alchemical,
Alphabetic_PF,
Anatolian_Hieroglyphs,
Ancient_Greek_Music,
Ancient_Greek_Numbers,
Ancient_Symbols,
Arabic,
Arabic_Ext_A,
Arabic_Ext_B,
Arabic_Ext_C,
Arabic_Math,
Arabic_PF_A,
Arabic_PF_B,
Arabic_Sup,
Armenian,
Arrows,
ASCII,
Avestan,
Balinese,
Bamum,
Bamum_Sup,
Bassa_Vah,
Batak,
Bengali,
Beria_Erfe,
Bhaiksuki,
Block_Elements,
Bopomofo,
Bopomofo_Ext,
Box_Drawing,
Brahmi,
Braille,
Buginese,
Buhid,
Byzantine_Music,
Carian,
Caucasian_Albanian,
Chakma,
Cham,
Cherokee,
Cherokee_Sup,
Chess_Symbols,
Chorasmian,
CJK,
CJK_Compat,
CJK_Compat_Forms,
CJK_Compat_Ideographs,
CJK_Compat_Ideographs_Sup,
CJK_Ext_A,
CJK_Ext_B,
CJK_Ext_C,
CJK_Ext_D,
CJK_Ext_E,
CJK_Ext_F,
CJK_Ext_G,
CJK_Ext_H,
CJK_Ext_I,
CJK_Ext_J,
CJK_Radicals_Sup,
CJK_Strokes,
CJK_Symbols,
Compat_Jamo,
Control_Pictures,
Coptic,
Coptic_Epact_Numbers,
Counting_Rod,
Cuneiform,
Cuneiform_Numbers,
Currency_Symbols,
Cypriot_Syllabary,
Cypro_Minoan,
Cyrillic,
Cyrillic_Ext_A,
Cyrillic_Ext_B,
Cyrillic_Ext_C,
Cyrillic_Ext_D,
Cyrillic_Sup,
Deseret,
Devanagari,
Devanagari_Ext,
Devanagari_Ext_A,
Diacriticals,
Diacriticals_Ext,
Diacriticals_For_Symbols,
Diacriticals_Sup,
Dingbats,
Dives_Akuru,
Dogra,
Domino,
Duployan,
Early_Dynastic_Cuneiform,
Egyptian_Hieroglyph_Format_Controls,
Egyptian_Hieroglyphs,
Egyptian_Hieroglyphs_Ext_A,
Elbasan,
Elymaic,
Emoticons,
Enclosed_Alphanum,
Enclosed_Alphanum_Sup,
Enclosed_CJK,
Enclosed_Ideographic_Sup,
Ethiopic,
Ethiopic_Ext,
Ethiopic_Ext_A,
Ethiopic_Ext_B,
Ethiopic_Sup,
Garay,
Geometric_Shapes,
Geometric_Shapes_Ext,
Georgian,
Georgian_Ext,
Georgian_Sup,
Glagolitic,
Glagolitic_Sup,
Gothic,
Grantha,
Greek,
Greek_Ext,
Gujarati,
Gunjala_Gondi,
Gurmukhi,
Gurung_Khema,
Half_And_Full_Forms,
Half_Marks,
Hangul,
Hanifi_Rohingya,
Hanunoo,
Hatran,
Hebrew,
High_PU_Surrogates,
High_Surrogates,
Hiragana,
IDC,
Ideographic_Symbols,
Imperial_Aramaic,
Indic_Number_Forms,
Indic_Siyaq_Numbers,
Inscriptional_Pahlavi,
Inscriptional_Parthian,
IPA_Ext,
Jamo,
Jamo_Ext_A,
Jamo_Ext_B,
Javanese,
Kaithi,
Kaktovik_Numerals,
Kana_Ext_A,
Kana_Ext_B,
Kana_Sup,
Kanbun,
Kangxi,
Kannada,
Katakana,
Katakana_Ext,
Kawi,
Kayah_Li,
Kharoshthi,
Khitan_Small_Script,
Khmer,
Khmer_Symbols,
Khojki,
Khudawadi,
Kirat_Rai,
Lao,
Latin_1_Sup,
Latin_Ext_A,
Latin_Ext_Additional,
Latin_Ext_B,
Latin_Ext_C,
Latin_Ext_D,
Latin_Ext_E,
Latin_Ext_F,
Latin_Ext_G,
Lepcha,
Letterlike_Symbols,
Limbu,
Linear_A,
Linear_B_Ideograms,
Linear_B_Syllabary,
Lisu,
Lisu_Sup,
Low_Surrogates,
Lycian,
Lydian,
Mahajani,
Mahjong,
Makasar,
Malayalam,
Mandaic,
Manichaean,
Marchen,
Masaram_Gondi,
Math_Alphanum,
Math_Operators,
Mayan_Numerals,
Medefaidrin,
Meetei_Mayek,
Meetei_Mayek_Ext,
Mende_Kikakui,
Meroitic_Cursive,
Meroitic_Hieroglyphs,
Miao,
Misc_Arrows,
Misc_Math_Symbols_A,
Misc_Math_Symbols_B,
Misc_Pictographs,
Misc_Symbols,
Misc_Symbols_Sup,
Misc_Technical,
Modi,
Modifier_Letters,
Modifier_Tone_Letters,
Mongolian,
Mongolian_Sup,
Mro,
Multani,
Music,
Myanmar,
Myanmar_Ext_A,
Myanmar_Ext_B,
Myanmar_Ext_C,
Nabataean,
Nag_Mundari,
Nandinagari,
NB,
New_Tai_Lue,
Newa,
NKo,
Number_Forms,
Nushu,
Nyiakeng_Puachue_Hmong,
OCR,
Ogham,
Ol_Chiki,
Ol_Onal,
Old_Hungarian,
Old_Italic,
Old_North_Arabian,
Old_Permic,
Old_Persian,
Old_Sogdian,
Old_South_Arabian,
Old_Turkic,
Old_Uyghur,
Oriya,
Ornamental_Dingbats,
Osage,
Osmanya,
Ottoman_Siyaq_Numbers,
Pahawh_Hmong,
Palmyrene,
Pau_Cin_Hau,
Phags_Pa,
Phaistos,
Phoenician,
Phonetic_Ext,
Phonetic_Ext_Sup,
Playing_Cards,
Psalter_Pahlavi,
PUA,
Punctuation,
Rejang,
Rumi,
Runic,
Samaritan,
Saurashtra,
Sharada,
Sharada_Sup,
Shavian,
Shorthand_Format_Controls,
Siddham,
Sidetic,
Sinhala,
Sinhala_Archaic_Numbers,
Small_Forms,
Small_Kana_Ext,
Sogdian,
Sora_Sompeng,
Soyombo,
Specials,
Sundanese,
Sundanese_Sup,
Sunuwar,
Sup_Arrows_A,
Sup_Arrows_B,
Sup_Arrows_C,
Sup_Math_Operators,
Sup_PUA_A,
Sup_PUA_B,
Sup_Punctuation,
Sup_Symbols_And_Pictographs,
Super_And_Sub,
Sutton_SignWriting,
Syloti_Nagri,
Symbols_And_Pictographs_Ext_A,
Symbols_For_Legacy_Computing,
Symbols_For_Legacy_Computing_Sup,
Syriac,
Syriac_Sup,
Tagalog,
Tagbanwa,
Tags,
Tai_Le,
Tai_Tham,
Tai_Viet,
Tai_Xuan_Jing,
Tai_Yo,
Takri,
Tamil,
Tamil_Sup,
Tangsa,
Tangut,
Tangut_Components,
Tangut_Components_Sup,
Tangut_Sup,
Telugu,
Thaana,
Thai,
Tibetan,
Tifinagh,
Tirhuta,
Todhri,
Tolong_Siki,
Toto,
Transport_And_Map,
Tulu_Tigalari,
UCAS,
UCAS_Ext,
UCAS_Ext_A,
Ugaritic,
Vai,
Vedic_Ext,
Vertical_Forms,
Vithkuqi,
VS,
VS_Sup,
Wancho,
Warang_Citi,
Yezidi,
Yi_Radicals,
Yi_Syllables,
Yijing,
Zanabazar_Square,
Znamenny_Music,
}
Combining_Class :: distinct byte
Paired_Bracket_Type :: enum {
Unknown,
Open,
Close,
None,
}
Bidi_Class :: enum {
Unknown, //
L, // Left-to-Right LRM
R, // Right-to-Left RLM
AL, // Right-to-Left Arabic ALM
EN, // European Number
ES, // European Number Separator
ET, // European Number Terminator
AN, // Arabic Number
CS, // Common Number Separator
NSM, // Nonspacing Mark
BN, // Boundary Neutral
B, // Paragraph Separator
S, // Segment Separator
WS, // Whitespace
ON, // Other Neutrals
LRE, // Left-to-Right Embedding LRE
LRO, // Left-to-Right Override LRO
RLE, // Right-to-Left Embedding RLE
RLO, // Right-to-Left Override RLO
PDF, // Pop Directional Format PDF
LRI, // Left-to-Right Isolate LRI
RLI, // Right-to-Left Isolate RLI
FSI, // First Strong Isolate FSI
PDI, // Pop Directional Isolate PDI
}
Bidi :: struct {
bc: Bidi_Class,
bmg: Maybe(rune), // mirrored glyph
m: bool, // Bidi mirrored
c: bool, // Bidi control property
pb: Paired_Bracket_Type, // bidi paired bracket type
bpb: rune, // bidi paired bracket properties
}
Decomposition_Type :: enum {
Nil = 0,
can,
com,
enc,
fin,
font,
fra,
init,
iso,
med,
nar,
nb,
sml,
sqr,
sub,
sup,
vert,
wid,
none,
}
Trinary_Bool :: enum {
Maybe = -1,
False = 0,
True = 1,
}
Decomposition_Mapping :: distinct [dynamic]rune
Decomposition :: struct {
dt: Decomposition_Type, // Decomposition type
dm: Decomposition_Mapping, // Decomposition Mapping
ce: bool, // Composition Exclusion
comp_ex: bool, // Full Composition Exclusion
nfc_quick_check: Trinary_Bool,
nfd_quick_check: bool,
nfkc_quick_check: Trinary_Bool,
nfkd_quick_check: bool,
}
Numeric_Type :: enum {
None = 0, // None
Decimal, // De
Digit, // Di
Numeric, // Nu
}
/*
Note: Value is NAN when numberator and denominator ar 0
*/
Numberic_Value :: struct {
numerator: int,
denominator: int,
}
Char :: struct {
cp: rune,
name: string,
gc: General_Category,
ccc: Combining_Class,
bc: Bidi_Class,
dt: Decomposition_Type,
dm: Decomposition_Mapping,
nt: Numeric_Type,
nv: Numberic_Value,
bm: bool,
name1: string,
sum: string, // Simple uppercase mapping
slm: string, // Simple lowercase mapping
stm: string, // Simple titlecase_mapping
}
Char_Range :: struct {
first_cp: rune,
last_cp: rune,
name: string,
gc: General_Category,
ccc: Combining_Class,
bc: Bidi_Class,
dt: Decomposition_Type,
dm: Decomposition_Mapping,
nt: Numeric_Type,
nv: Numberic_Value,
bm: bool,
name1: string,
sum: string, // Simple uppercase mapping
slm: string, // Simple lowercase mapping
stm: string, // Simple titlecase_mapping
}
Chars :: union {
Char,
Char_Range,
}
Unicode_Data :: distinct [dynamic]Chars
Prop_List_Property :: enum {
Unknown,
White_Space,
Bidi_Control,
Join_Control,
Dash,
Hyphen,
Quotation_Mark,
Terminal_Punctuation,
Other_Math,
Hex_Digit,
ASCII_Hex_Digit,
Other_Alphabetic,
Ideographic,
Diacritic,
Extender,
Other_Lowercase,
Other_Uppercase,
Noncharacter_Code_Point,
Other_Grapheme_Extend,
IDS_Binary_Operator,
IDS_Trinary_Operator,
IDS_Unary_Operator,
Radical,
Unified_Ideograph,
Other_Default_Ignorable_Code_Point,
Deprecated,
Soft_Dotted,
Logical_Order_Exception,
Other_ID_Start,
Other_ID_Continue,
ID_Compat_Math_Continue,
ID_Compat_Math_Start,
Sentence_Terminal,
Variation_Selector,
Pattern_White_Space,
Pattern_Syntax,
Prepended_Concatenation_Mark,
Regional_Indicator,
Modifier_Combining_Mark,
}
UCD_Error :: enum {
XML_LOAD_ERROR,
XML_Not_UCD,
Nil_XML_Document,
Element_Not_Repertoire,
Extra_Fields,
Unknown_Property,
Unknown_Bidi_Class,
NO_REPERTOIRE,
UNEXPECTED_STRING,
Invalid_Hex_Number,
Invalid_General_Category,
UnicodeData_6_Too_Long,
UnicodeData_6_Invalid,
UnicodeData_7_Too_Long,
UnicodeData_7_Invalid,
}
Error :: union #shared_nil {
UCD_Error,
os.Error,
}
Range_u16 :: struct {
first: u16,
last: u16,
}
Range_i32 :: struct {
first: i32,
last: i32,
}
Range_Rune :: struct {
first: rune,
last: rune,
}
Dynamic_Range :: struct {
single_16: [dynamic]u16,
ranges_16: [dynamic]Range_u16,
single_32: [dynamic]i32,
ranges_32: [dynamic]Range_i32,
}
append_to_dynamic_range :: proc(dr: ^Dynamic_Range, range: Range_Rune, allocator := context.allocator) {
if range.first == range.last && range.first <= 0xFFFF {
if len(dr.single_16) == 0 {
dr.single_16 = make([dynamic]u16, 0, 512, allocator)
}
append(&dr.single_16, cast(u16)range.first)
} else if range.first == range.last {
if len(dr.single_32) == 0 {
dr.single_32 = make([dynamic]i32, 0, 512, allocator)
}
append(&dr.single_32, cast(i32)range.first)
} else if range.first <= 0xFFFF && range.last <= 0xFFFF {
if len(dr.ranges_16) == 0 {
dr.ranges_16 = make([dynamic]Range_u16, 0, 128, allocator)
}
r := Range_u16{ cast(u16)range.first, cast(u16)range.last}
append(&dr.ranges_16, r)
} else {
if len(dr.ranges_32) == 0 {
dr.ranges_32 = make([dynamic]Range_i32, 0, 128, allocator)
}
r := Range_i32{ cast(i32)range.first, cast(i32)range.last}
append(&dr.ranges_32, r)
}
}
destroy_dynamic_range :: proc(dr: Dynamic_Range) {
delete(dr.ranges_16)
delete(dr.ranges_32)
delete(dr.single_16)
delete(dr.single_32)
}
destroy_general_category_ranges :: proc(gcr: [General_Category]Dynamic_Range) {
for r in gcr {
destroy_dynamic_range(r)
}
}

View File

@@ -0,0 +1,290 @@
package ucd
import "core:strings"
import "core:os"
import "core:strconv"
decode_rune :: proc(str: string) -> (cp1, cp2: rune, err: Error) {
head, _, tail := strings.partition(str, "..")
if _cp1, _ok := strconv.parse_int(head, 16); !_ok {
return 0, 0, .Invalid_Hex_Number
} else {
cp1 = rune(_cp1)
}
if len(tail) == 0 {
return cp1, cp1, nil
}
if _cp2, _ok := strconv.parse_int(tail, 16); !_ok {
return 0, 0, .Invalid_Hex_Number
} else {
cp2 = rune(_cp2)
}
return
}
load_unicode_data :: proc(filename: string, allocator := context.allocator) -> (unicode_data: Unicode_Data, err: Error) {
data := os.read_entire_file(filename, context.temp_allocator) or_return
defer free_all(context.temp_allocator)
first_cp: rune
str := string(data)
line_loop: for _line in strings.split_lines_iterator(&str) {
// Ignore any comments
line, _, _ := strings.partition(_line, "#")
// Skip empty lines
if len(line) == 0 { continue }
is_range := false
cp: rune
name: string
gc: General_Category
num_6: string
num_7: string
nt := Numeric_Type.None
field_num := 0
for _field in strings.split_iterator(&line, ";") {
defer field_num += 1
field := strings.trim_space(_field)
switch field_num {
case 0: // Code point
cp, _ = decode_rune(field) or_return
case 1: // Name
if len(field) > 9 && field[0] == '<' && strings.ends_with(field, ", First>") {
first_cp = cp
continue line_loop
}
if len(field) > 9 && field[0] == '<' && strings.ends_with(field, ", Last>") {
name = strings.clone(field[1:len(field)-7], allocator)
is_range = true
} else {
name = strings.clone(field[:], allocator)
}
case 2: // General_Category
// NOTE: This is currently igorning a possible error it should probably be fixed
gc, _ = string_to_general_category(field)
case 3: // Canonical_Combining_Class
case 4: // Bidi Class
case 5: // Decomposition_Type and Decomposition_Mapping
// Numeric_Type and Numeric_Value
case 6:
num_6 = field
case 7:
num_7 = field
case 8:
switch {
case num_6 != "" && num_7 != "" && field != "" :
nt = .Decimal
case num_6 == "" && num_7 != "" && field != "" :
nt = .Digit
case num_6 == "" && num_7 == "" && field != "" :
nt = .Numeric
case:
nt = .None
}
case 9: // Bidi mirrored
case 10: // Unicode 1 Name (Obsolete as of 6.2.0)
case 11: // should be null
case 12:
case 13:
case 14:
case:
err = .Extra_Fields
return
}
}
if is_range {
append(&unicode_data, Char_Range {
gc = gc,
first_cp = first_cp,
last_cp = cp,
name = name,
nt = nt,
})
} else {
append(&unicode_data, Char{
gc = gc,
cp = cp,
name = name,
nt = nt,
})
}
}
return
}
destroy_unicode_data :: proc(unicode_data: Unicode_Data) {
for point in unicode_data {
switch p in point {
case Char:
delete(p.name)
case Char_Range:
delete(p.name)
}
}
delete(unicode_data)
}
gc_ranges :: proc(ud: ^Unicode_Data, allocator := context.allocator) -> (lst: [General_Category]Dynamic_Range) {
range := Range_Rune{
first = -1,
last = -1,
}
gc: General_Category
for point in ud {
switch p in point {
case Char:
if range.first != -1 && (p.cp != range.last + 1 || p.gc != gc) {
append_to_dynamic_range(&lst[gc], range, allocator)
range.first = -1
range.last = -1
}
range.first = rune(min(u32(range.first), u32(p.cp)))
gc = p.gc
range.last = p.cp
case Char_Range:
if range.first != -1 {
append_to_dynamic_range(&lst[gc], range, allocator)
}
range.first = p.first_cp
range.last = p.last_cp
append_to_dynamic_range(&lst[p.gc], range ,allocator)
range.first = -1
range.last = -1
}
}
if range.first != -1 {
append_to_dynamic_range(&lst[gc], range, allocator)
}
return
}
extra_digits :: proc(ud: ^Unicode_Data, allocator := context.allocator) -> (Dynamic_Range) {
range := Range_Rune {
first = -1,
last = -1,
}
exd: Dynamic_Range
for point in ud {
switch p in point {
case Char:
exd_type := p.gc != .Nd && (p.nt == .Decimal || p.nt == .Digit)
if range.first != -1 && (p.cp != range.last + 1 || !exd_type) {
append_to_dynamic_range(&exd, range, allocator)
range.first = -1
range.last = -1
}
if exd_type {
range.first = rune(min(u32(range.first), u32(p.cp)))
range.last = p.cp
}
case Char_Range:
exd_type := p.gc != .Nd && (p.nt == .Decimal || p.nt == .Digit)
if range.first != -1 {
append_to_dynamic_range(&exd, range, allocator)
}
if exd_type {
range.first = p.first_cp
range.last = p.last_cp
append_to_dynamic_range(&exd, range ,allocator)
}
range.first = -1
range.last = -1
}
}
if range.first != -1 {
append_to_dynamic_range(&exd, range, allocator)
}
return exd
}
/*
Data contained in the Unicode fiel PropList.txt
A `Prop_List` is the data contained in the Unicode Database (UCD) file `PropList.txt`.
It is created with the procedure `load_property_list` and destroyed with the procedure `destroy_property_list`.
*/
Prop_List :: [PropList_Property]Dynamic_Range
/*
This function destroys a `Prop_List` created by `load_property_list`.
Inputs:
- props: The Prop_List to destroy
*/
destroy_property_list :: proc(props: Prop_List) {
for r in props {
delete(r.ranges_16)
delete(r.ranges_32)
delete(r.single_16)
delete(r.single_32)
}
}
load_property_list :: proc(filename: string, allocator := context.allocator) -> (props: Prop_List, err: Error) {
data := os.read_entire_file(filename, allocator) or_return
defer delete(data)
str := string(data)
for _line in strings.split_lines_iterator(&str) {
line, _, _ := strings.partition(_line, "#")
if len(line) == 0 {
continue
}
rr: Range_Rune
prop: PropList_Property
i := 0
for _field in strings.split_iterator(&line, ";") {
defer i += 1
field := strings.trim_space(_field)
switch i {
// Code point or code point range
case 0: rr.first, rr.last = decode_rune(field) or_return
case 1: prop = string_to_proplist_property(field) or_return
case: return {}, .Extra_Fields
}
}
append_to_dynamic_range(&props[prop], rr, allocator)
}
return
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,39 @@
UNICODE LICENSE V3
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2026 Unicode, Inc.
NOTICE TO USER: Carefully read the following legal agreement. BY
DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR
SOFTWARE, YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
Permission is hereby granted, free of charge, to any person obtaining a
copy of data files and any associated documentation (the "Data Files") or
software and any associated documentation (the "Software") to deal in the
Data Files or Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, and/or sell
copies of the Data Files or Software, and to permit persons to whom the
Data Files or Software are furnished to do so, provided that either (a)
this copyright and permission notice appear with all copies of the Data
Files or Software, or (b) this copyright and permission notice appear in
associated Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE
BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES,
OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA
FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.