From a38184603411e3ce5a22040196b8b3e482c0987e Mon Sep 17 00:00:00 2001 From: Laytan Laats Date: Fri, 12 May 2023 19:26:29 +0200 Subject: [PATCH] add encoding/hex and use it to expand the percent decoding chars --- core/encoding/hex/hex.odin | 74 ++++++++++++++++++++++++++++++++++++++ core/net/url.odin | 44 +++-------------------- 2 files changed, 79 insertions(+), 39 deletions(-) create mode 100644 core/encoding/hex/hex.odin diff --git a/core/encoding/hex/hex.odin b/core/encoding/hex/hex.odin new file mode 100644 index 000000000..f66febbff --- /dev/null +++ b/core/encoding/hex/hex.odin @@ -0,0 +1,74 @@ +package hex + +import "core:strings" + +encode :: proc(src: []byte, allocator := context.allocator) -> []byte #no_bounds_check { + dst := make([]byte, len(src) * 2, allocator) + for i := 0; i < len(src); i += 1 { + v := src[i] + dst[i] = HEXTABLE[v>>4] + dst[i+1] = HEXTABLE[v&0x0f] + i += 2 + } + + return dst +} + + +decode :: proc(src: []byte, allocator := context.allocator) -> (dst: []byte, ok: bool) #no_bounds_check { + if len(src) % 2 == 1 { + return + } + + dst = make([]byte, len(src) / 2, allocator) + for i, j := 0, 1; j < len(src); j += 2 { + p := src[j-1] + q := src[j] + + a := hex_digit(p) or_return + b := hex_digit(q) or_return + + dst[i] = (a << 4) | b + i += 1 + } + + return dst, true +} + +// Decodes the given sequence into one byte. +// Should be called with one rune worth of the source, eg: 0x23 -> '#'. +decode_sequence :: proc(str: string) -> (byte, bool) { + no_prefix_str := strings.trim_prefix(str, "0x") + val: byte + for i := 0; i < len(no_prefix_str); i += 1 { + index := (len(no_prefix_str) - 1) - i // reverse the loop. + + hd, ok := hex_digit(no_prefix_str[i]) + if !ok { + return 0, false + } + + val += u8(hd) << uint(4 * index) + } + + return val, true +} + +@(private) +HEXTABLE := [16]byte { + '0', '1', '2', '3', + '4', '5', '6', '7', + '8', '9', 'a', 'b', + 'c', 'd', 'e', 'f', +} + +@(private) +hex_digit :: proc(char: byte) -> (u8, bool) { + switch char { + case '0' ..= '9': return char - '0', true + case 'a' ..= 'f': return char - 'a' + 10, true + case 'A' ..= 'F': return char - 'A' + 10, true + case: return 0, false + } +} + diff --git a/core/net/url.odin b/core/net/url.odin index 460bb70ad..fe081685d 100644 --- a/core/net/url.odin +++ b/core/net/url.odin @@ -19,7 +19,7 @@ package net import "core:strings" import "core:strconv" import "core:unicode/utf8" -import "core:mem" +import "core:encoding/hex" split_url :: proc(url: string, allocator := context.allocator) -> (scheme, host, path: string, queries: map[string]string) { s := url @@ -121,12 +121,10 @@ percent_decode :: proc(encoded_string: string, allocator := context.allocator) - builder_grow(&b, len(encoded_string)) defer if !ok do builder_destroy(&b) - stack_buf: [4]u8 - pending := mem.buffer_from_slice(stack_buf[:]) s := encoded_string for len(s) > 0 { - i := index_rune(s, '%') + i := index_byte(s, '%') if i == -1 { write_string(&b, s) // no '%'s; the string is already decoded break @@ -139,47 +137,15 @@ percent_decode :: proc(encoded_string: string, allocator := context.allocator) - s = s[1:] if s[0] == '%' { - write_rune(&b, '%') + write_byte(&b, '%') s = s[1:] continue } if len(s) < 2 do return // percent without encoded value - n: int - n, _ = strconv.parse_int(s[:2], 16) - switch n { - case 0x20: write_rune(&b, ' ') - case 0x21: write_rune(&b, '!') - case 0x23: write_rune(&b, '#') - case 0x24: write_rune(&b, '$') - case 0x25: write_rune(&b, '%') - case 0x26: write_rune(&b, '&') - case 0x27: write_rune(&b, '\'') - case 0x28: write_rune(&b, '(') - case 0x29: write_rune(&b, ')') - case 0x2A: write_rune(&b, '*') - case 0x2B: write_rune(&b, '+') - case 0x2C: write_rune(&b, ',') - case 0x2F: write_rune(&b, '/') - case 0x3A: write_rune(&b, ':') - case 0x3B: write_rune(&b, ';') - case 0x3D: write_rune(&b, '=') - case 0x3F: write_rune(&b, '?') - case 0x40: write_rune(&b, '@') - case 0x5B: write_rune(&b, '[') - case 0x5D: write_rune(&b, ']') - case: - // utf-8 bytes - // TODO(tetra): Audit this - 4 bytes??? - append(&pending, s[0]) - append(&pending, s[1]) - if len(pending) == 4 { - r, _ := utf8.decode_rune(pending[:]) - write_rune(&b, r) - clear(&pending) - } - } + val := hex.decode_sequence(s[:2]) or_return + write_byte(&b, val) s = s[2:] }