add encoding/hex and use it to expand the percent decoding chars

2026-07-18 05:31:05 +00:00 · 2023-05-12 19:26:29 +02:00
parent 8693a045bb
commit a381846034
2 changed files with 79 additions and 39 deletions
--- a/core/encoding/hex/hex.odin
+++ b/core/encoding/hex/hex.odin
@@ -0,0 +1,74 @@
+package hex
+
+import "core:strings"
+
+encode :: proc(src: []byte, allocator := context.allocator) -> []byte #no_bounds_check {
+	dst := make([]byte, len(src) * 2, allocator)
+    for i := 0; i < len(src); i += 1 {
+		v := src[i]
+        dst[i]   = HEXTABLE[v>>4]
+        dst[i+1] = HEXTABLE[v&0x0f]
+        i += 2
+    }
+
+	return dst
+}
+
+
+decode :: proc(src: []byte, allocator := context.allocator) -> (dst: []byte, ok: bool) #no_bounds_check {
+	if len(src) % 2 == 1 {
+		return
+	}
+
+	dst = make([]byte, len(src) / 2, allocator)
+	for i, j := 0, 1; j < len(src); j += 2 {
+		p := src[j-1]
+		q := src[j]
+
+		a := hex_digit(p) or_return
+		b := hex_digit(q) or_return
+
+		dst[i] = (a << 4) | b
+		i += 1
+	}
+
+	return dst, true
+}
+
+// Decodes the given sequence into one byte.
+// Should be called with one rune worth of the source, eg: 0x23 -> '#'.
+decode_sequence :: proc(str: string) -> (byte, bool) {
+	no_prefix_str := strings.trim_prefix(str, "0x")
+	val: byte
+	for i := 0; i < len(no_prefix_str); i += 1 {
+		index := (len(no_prefix_str) - 1) - i // reverse the loop.
+
+		hd, ok := hex_digit(no_prefix_str[i])
+		if !ok {
+			return 0, false
+		}
+
+		val += u8(hd) << uint(4 * index)
+	}
+
+	return val, true
+}
+
+@(private)
+HEXTABLE := [16]byte {
+    '0', '1', '2', '3',
+    '4', '5', '6', '7',
+    '8', '9', 'a', 'b',
+    'c', 'd', 'e', 'f',
+}
+
+@(private)
+hex_digit :: proc(char: byte) -> (u8, bool) {
+    switch char {
+    case '0' ..= '9': return char - '0', true
+    case 'a' ..= 'f': return char - 'a' + 10, true
+    case 'A' ..= 'F': return char - 'A' + 10, true
+    case:             return 0, false
+    }
+}
+
--- a/core/net/url.odin
+++ b/core/net/url.odin
@@ -19,7 +19,7 @@ package net
 import "core:strings"
 import "core:strconv"
 import "core:unicode/utf8"
-import "core:mem"
+import "core:encoding/hex"

 split_url :: proc(url: string, allocator := context.allocator) -> (scheme, host, path: string, queries: map[string]string) {
 	s := url
@@ -121,12 +121,10 @@ percent_decode :: proc(encoded_string: string, allocator := context.allocator) -
 	builder_grow(&b, len(encoded_string))
 	defer if !ok do builder_destroy(&b)

-	stack_buf: [4]u8
-	pending := mem.buffer_from_slice(stack_buf[:])
 	s := encoded_string

 	for len(s) > 0 {
-		i := index_rune(s, '%')
+		i := index_byte(s, '%')
 		if i == -1 {
 			write_string(&b, s) // no '%'s; the string is already decoded
 			break
@@ -139,47 +137,15 @@ percent_decode :: proc(encoded_string: string, allocator := context.allocator) -
 		s = s[1:]

 		if s[0] == '%' {
-			write_rune(&b, '%')
+			write_byte(&b, '%')
 			s = s[1:]
 			continue
 		}

 		if len(s) < 2 do return // percent without encoded value

-		n: int
-		n, _ = strconv.parse_int(s[:2], 16)
-		switch n {
-		case 0x20:  write_rune(&b, ' ')
-		case 0x21:  write_rune(&b, '!')
-		case 0x23:  write_rune(&b, '#')
-		case 0x24:  write_rune(&b, '$')
-		case 0x25:  write_rune(&b, '%')
-		case 0x26:  write_rune(&b, '&')
-		case 0x27:  write_rune(&b, '\'')
-		case 0x28:  write_rune(&b, '(')
-		case 0x29:  write_rune(&b, ')')
-		case 0x2A:  write_rune(&b, '*')
-		case 0x2B:  write_rune(&b, '+')
-		case 0x2C:  write_rune(&b, ',')
-		case 0x2F:  write_rune(&b, '/')
-		case 0x3A:  write_rune(&b, ':')
-		case 0x3B:  write_rune(&b, ';')
-		case 0x3D:  write_rune(&b, '=')
-		case 0x3F:  write_rune(&b, '?')
-		case 0x40:  write_rune(&b, '@')
-		case 0x5B:  write_rune(&b, '[')
-		case 0x5D:  write_rune(&b, ']')
-		case:
-			// utf-8 bytes
-			// TODO(tetra): Audit this - 4 bytes???
-			append(&pending, s[0])
-			append(&pending, s[1])
-			if len(pending) == 4 {
-				r, _ := utf8.decode_rune(pending[:])
-				write_rune(&b, r)
-				clear(&pending)
-			}
-		}
+		val := hex.decode_sequence(s[:2]) or_return
+		write_byte(&b, val)
 		s = s[2:]
 	}