mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-28 01:03:56 +00:00
Merge branch 'master' into xmlcomment
This commit is contained in:
@@ -24,6 +24,18 @@ ENC_TABLE := [64]byte {
|
||||
'4', '5', '6', '7', '8', '9', '+', '/',
|
||||
}
|
||||
|
||||
// Encoding table for Base64url variant
|
||||
ENC_URL_TABLE := [64]byte {
|
||||
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
|
||||
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
|
||||
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
|
||||
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
|
||||
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
|
||||
'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
|
||||
'w', 'x', 'y', 'z', '0', '1', '2', '3',
|
||||
'4', '5', '6', '7', '8', '9', '-', '_',
|
||||
}
|
||||
|
||||
PADDING :: '='
|
||||
|
||||
DEC_TABLE := [256]u8 {
|
||||
@@ -61,6 +73,43 @@ DEC_TABLE := [256]u8 {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
}
|
||||
|
||||
// Decoding table for Base64url variant
|
||||
DEC_URL_TABLE := [256]u8 {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 62, 0, 0,
|
||||
52, 53, 54, 55, 56, 57, 58, 59,
|
||||
60, 61, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 1, 2, 3, 4, 5, 6,
|
||||
7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22,
|
||||
23, 24, 25, 0, 0, 0, 0, 63,
|
||||
0, 26, 27, 28, 29, 30, 31, 32,
|
||||
33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48,
|
||||
49, 50, 51, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
}
|
||||
|
||||
|
||||
encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> (encoded: string, err: mem.Allocator_Error) #optional_allocator_error {
|
||||
out_length := encoded_len(data)
|
||||
if out_length == 0 {
|
||||
|
||||
@@ -21,6 +21,7 @@ package encoding_unicode_entity
|
||||
Jeroen van Rijn: Initial implementation.
|
||||
*/
|
||||
|
||||
import "base:runtime"
|
||||
import "core:unicode/utf8"
|
||||
import "core:unicode"
|
||||
import "core:strings"
|
||||
@@ -141,8 +142,10 @@ decode_xml :: proc(input: string, options := XML_Decode_Options{}, allocator :=
|
||||
write_string(&builder, entity)
|
||||
} else {
|
||||
if .No_Entity_Decode not_in options {
|
||||
if decoded, ok := xml_decode_entity(entity); ok {
|
||||
write_rune(&builder, decoded)
|
||||
if decoded, count, ok := xml_decode_entity(entity); ok {
|
||||
for i in 0..<count {
|
||||
write_rune(&builder, decoded[i])
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
@@ -212,17 +215,16 @@ advance :: proc(t: ^Tokenizer) -> (err: Error) {
|
||||
}
|
||||
}
|
||||
|
||||
xml_decode_entity :: proc(entity: string) -> (decoded: rune, ok: bool) {
|
||||
xml_decode_entity :: proc(entity: string) -> (decoded: [2]rune, rune_count: int, ok: bool) {
|
||||
entity := entity
|
||||
if len(entity) == 0 { return -1, false }
|
||||
if len(entity) == 0 { return }
|
||||
|
||||
switch entity[0] {
|
||||
case '#':
|
||||
if entity[0] == '#' {
|
||||
base := 10
|
||||
val := 0
|
||||
entity = entity[1:]
|
||||
|
||||
if len(entity) == 0 { return -1, false }
|
||||
if len(entity) == 0 { return }
|
||||
|
||||
if entity[0] == 'x' || entity[0] == 'X' {
|
||||
base = 16
|
||||
@@ -237,30 +239,275 @@ xml_decode_entity :: proc(entity: string) -> (decoded: rune, ok: bool) {
|
||||
val += int(r - '0')
|
||||
|
||||
case 'a'..='f':
|
||||
if base == 10 { return -1, false }
|
||||
if base == 10 { return }
|
||||
val *= base
|
||||
val += int(r - 'a' + 10)
|
||||
|
||||
case 'A'..='F':
|
||||
if base == 10 { return -1, false }
|
||||
if base == 10 { return }
|
||||
val *= base
|
||||
val += int(r - 'A' + 10)
|
||||
|
||||
case:
|
||||
return -1, false
|
||||
return
|
||||
}
|
||||
|
||||
if val > MAX_RUNE_CODEPOINT { return -1, false }
|
||||
if val > MAX_RUNE_CODEPOINT { return }
|
||||
entity = entity[1:]
|
||||
}
|
||||
return rune(val), true
|
||||
|
||||
case:
|
||||
// Named entity.
|
||||
return named_xml_entity_to_rune(entity)
|
||||
return rune(val), 1, true
|
||||
}
|
||||
// Named entity.
|
||||
return named_xml_entity_to_rune(entity)
|
||||
}
|
||||
|
||||
|
||||
// escape_html escapes special characters like '&' to become '&'.
|
||||
// It escapes only 5 different characters: & ' < > and "
|
||||
@(require_results)
|
||||
escape_html :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> (output: string, was_allocation: bool) {
|
||||
/*
|
||||
& -> &
|
||||
' -> ' // ' is shorter than ' (NOTE: ' was not available until HTML 5)
|
||||
< -> <
|
||||
> -> >
|
||||
" -> " // " is shorter than "
|
||||
*/
|
||||
|
||||
b := transmute([]byte)s
|
||||
|
||||
extra_bytes_needed := 0
|
||||
|
||||
for c in b {
|
||||
switch c {
|
||||
case '&': extra_bytes_needed += 4
|
||||
case '\'': extra_bytes_needed += 4
|
||||
case '<': extra_bytes_needed += 3
|
||||
case '>': extra_bytes_needed += 3
|
||||
case '"': extra_bytes_needed += 4
|
||||
}
|
||||
}
|
||||
|
||||
if extra_bytes_needed == 0 {
|
||||
return s, false
|
||||
}
|
||||
|
||||
t, err := make([]byte, len(s) + extra_bytes_needed, allocator, loc)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
was_allocation = true
|
||||
|
||||
w := 0
|
||||
for c in b {
|
||||
x := ""
|
||||
switch c {
|
||||
case '&': x = "&"
|
||||
case '\'': x = "'"
|
||||
case '<': x = "<"
|
||||
case '>': x = ">"
|
||||
case '"': x = """
|
||||
}
|
||||
if x != "" {
|
||||
copy(t[w:], x)
|
||||
w += len(x)
|
||||
} else {
|
||||
t[w] = c
|
||||
w += 1
|
||||
}
|
||||
}
|
||||
output = string(t[0:w])
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@(require_results)
|
||||
unescape_html :: proc(s: string, allocator := context.allocator, loc := #caller_location) -> (output: string, was_allocation: bool, err: runtime.Allocator_Error) {
|
||||
@(require_results)
|
||||
do_append :: proc(s: string, amp_idx: int, buf: ^[dynamic]byte) -> (n: int) {
|
||||
s, amp_idx := s, amp_idx
|
||||
|
||||
n += len(s[:amp_idx])
|
||||
if buf != nil { append(buf, s[:amp_idx]) }
|
||||
s = s[amp_idx:]
|
||||
for len(s) > 0 {
|
||||
b, w, j := unescape_entity(s)
|
||||
n += w
|
||||
if buf != nil { append(buf, ..b[:w]) }
|
||||
|
||||
s = s[j:]
|
||||
|
||||
amp_idx = strings.index_byte(s, '&')
|
||||
if amp_idx < 0 {
|
||||
n += len(s)
|
||||
if buf != nil { append(buf, s) }
|
||||
break
|
||||
}
|
||||
n += amp_idx
|
||||
if buf != nil { append(buf, s[:amp_idx]) }
|
||||
s = s[amp_idx:]
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
s := s
|
||||
amp_idx := strings.index_byte(s, '&')
|
||||
if amp_idx < 0 {
|
||||
return s, false, nil
|
||||
}
|
||||
|
||||
// NOTE(bill): this does a two pass in order to minimize the allocations required
|
||||
bytes_required := do_append(s, amp_idx, nil)
|
||||
|
||||
buf := make([dynamic]byte, 0, bytes_required, allocator, loc) or_return
|
||||
was_allocation = true
|
||||
|
||||
_ = do_append(s, amp_idx, &buf)
|
||||
|
||||
assert(len(buf) == cap(buf))
|
||||
output = string(buf[:])
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Returns an unescaped string of an encoded XML/HTML entity.
|
||||
@(require_results)
|
||||
unescape_entity :: proc(s: string) -> (b: [8]byte, w: int, j: int) {
|
||||
s := s
|
||||
if len(s) < 2 {
|
||||
return
|
||||
}
|
||||
if s[0] != '&' {
|
||||
return
|
||||
}
|
||||
j = 1
|
||||
|
||||
if s[j] == '#' { // scan numbers
|
||||
j += 1
|
||||
if len(s) <= 3 { // remove `&#.`
|
||||
return
|
||||
}
|
||||
c := s[j]
|
||||
hex := false
|
||||
if c == 'x' || c == 'X' {
|
||||
hex = true
|
||||
j += 1
|
||||
}
|
||||
|
||||
x := rune(0)
|
||||
scan_number: for j < len(s) {
|
||||
c = s[j]
|
||||
j += 1
|
||||
if hex {
|
||||
switch c {
|
||||
case '0'..='9': x = 16*x + rune(c) - '0'; continue scan_number
|
||||
case 'a'..='f': x = 16*x + rune(c) - 'a' + 10; continue scan_number
|
||||
case 'A'..='F': x = 16*x + rune(c) - 'A' + 10; continue scan_number
|
||||
}
|
||||
} else {
|
||||
switch c {
|
||||
case '0'..='9': x = 10*x + rune(c) - '0'; continue scan_number
|
||||
}
|
||||
}
|
||||
|
||||
// Keep the ';' to check for cases which require it and cases which might not
|
||||
if c != ';' {
|
||||
j -= 1
|
||||
}
|
||||
break scan_number
|
||||
}
|
||||
|
||||
|
||||
if j <= 3 { // no replacement characters found
|
||||
return
|
||||
}
|
||||
|
||||
@(static, rodata)
|
||||
windows_1252_replacement_table := [0xa0 - 0x80]rune{ // Windows-1252 -> UTF-8
|
||||
'\u20ac', '\u0081', '\u201a', '\u0192',
|
||||
'\u201e', '\u2026', '\u2020', '\u2021',
|
||||
'\u02c6', '\u2030', '\u0160', '\u2039',
|
||||
'\u0152', '\u008d', '\u017d', '\u008f',
|
||||
'\u0090', '\u2018', '\u2019', '\u201c',
|
||||
'\u201d', '\u2022', '\u2013', '\u2014',
|
||||
'\u02dc', '\u2122', '\u0161', '\u203a',
|
||||
'\u0153', '\u009d', '\u017e', '\u0178',
|
||||
}
|
||||
|
||||
switch x {
|
||||
case 0x80..<0xa0:
|
||||
x = windows_1252_replacement_table[x-0x80]
|
||||
case 0, 0xd800..=0xdfff:
|
||||
x = utf8.RUNE_ERROR
|
||||
case:
|
||||
if x > 0x10ffff {
|
||||
x = utf8.RUNE_ERROR
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
b1, w1 := utf8.encode_rune(x)
|
||||
w += copy(b[:], b1[:w1])
|
||||
return
|
||||
}
|
||||
|
||||
// Lookup by entity names
|
||||
|
||||
scan_ident: for j < len(s) { // scan over letters and digits
|
||||
c := s[j]
|
||||
j += 1
|
||||
|
||||
switch c {
|
||||
case 'a'..='z', 'A'..='Z', '0'..='9':
|
||||
continue scan_ident
|
||||
}
|
||||
// Keep the ';' to check for cases which require it and cases which might not
|
||||
if c != ';' {
|
||||
j -= 1
|
||||
}
|
||||
break scan_ident
|
||||
}
|
||||
|
||||
entity_name := s[1:j]
|
||||
if len(entity_name) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if entity_name[len(entity_name)-1] == ';' {
|
||||
entity_name = entity_name[:len(entity_name)-1]
|
||||
}
|
||||
|
||||
if r2, _, ok := named_xml_entity_to_rune(entity_name); ok {
|
||||
b1, w1 := utf8.encode_rune(r2[0])
|
||||
w += copy(b[w:], b1[:w1])
|
||||
if r2[1] != 0 {
|
||||
b2, w2 := utf8.encode_rune(r2[1])
|
||||
w += copy(b[w:], b2[:w2])
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// The longest entities that do not end with a semicolon are <=6 bytes long
|
||||
LONGEST_ENTITY_WITHOUT_SEMICOLON :: 6
|
||||
|
||||
n := min(len(entity_name)-1, LONGEST_ENTITY_WITHOUT_SEMICOLON)
|
||||
for i := n; i > 1; i -= 1 {
|
||||
if r2, _, ok := named_xml_entity_to_rune(entity_name[:i]); ok {
|
||||
b1, w1 := utf8.encode_rune(r2[0])
|
||||
w += copy(b[w:], b1[:w1])
|
||||
if r2[1] != 0 {
|
||||
b2, w2 := utf8.encode_rune(r2[1])
|
||||
w += copy(b[w:], b2[:w2])
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
// Private XML helper to extract `&<stuff>;` entity.
|
||||
@(private="file")
|
||||
_extract_xml_entity :: proc(t: ^Tokenizer) -> (entity: string, err: Error) {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user