Merge pull request #4641 from zolk3ri/fix/base32-error-handling

encoding/base32: Add RFC-compliant error handling and improve reliability
This commit is contained in:
Jeroen van Rijn
2025-01-04 20:27:16 +01:00
committed by GitHub
2 changed files with 457 additions and 148 deletions

View File

@@ -1,148 +1,230 @@
package encoding_base32
// @note(zh): Encoding utility for Base32
// A secondary param can be used to supply a custom alphabet to
// @link(encode) and a matching decoding table to @link(decode).
// If none is supplied it just uses the standard Base32 alphabet.
// Incase your specific version does not use padding, you may
// truncate it from the encoded output.
ENC_TABLE := [32]byte {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', '2', '3', '4', '5', '6', '7',
}
PADDING :: '='
DEC_TABLE := [?]u8 {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> string {
out_length := (len(data) + 4) / 5 * 8
out := make([]byte, out_length)
_encode(out, data)
return string(out)
}
@private
_encode :: proc(out, data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) {
out := out
data := data
for len(data) > 0 {
carry: byte
switch len(data) {
case:
out[7] = ENC_TABLE[data[4] & 0x1f]
carry = data[4] >> 5
fallthrough
case 4:
out[6] = ENC_TABLE[carry | (data[3] << 3) & 0x1f]
out[5] = ENC_TABLE[(data[3] >> 2) & 0x1f]
carry = data[3] >> 7
fallthrough
case 3:
out[4] = ENC_TABLE[carry | (data[2] << 1) & 0x1f]
carry = (data[2] >> 4) & 0x1f
fallthrough
case 2:
out[3] = ENC_TABLE[carry | (data[1] << 4) & 0x1f]
out[2] = ENC_TABLE[(data[1] >> 1) & 0x1f]
carry = (data[1] >> 6) & 0x1f
fallthrough
case 1:
out[1] = ENC_TABLE[carry | (data[0] << 2) & 0x1f]
out[0] = ENC_TABLE[data[0] >> 3]
}
if len(data) < 5 {
out[7] = byte(PADDING)
if len(data) < 4 {
out[6] = byte(PADDING)
out[5] = byte(PADDING)
if len(data) < 3 {
out[4] = byte(PADDING)
if len(data) < 2 {
out[3] = byte(PADDING)
out[2] = byte(PADDING)
}
}
}
break
}
data = data[5:]
out = out[8:]
}
}
decode :: proc(data: string, DEC_TBL := DEC_TABLE, allocator := context.allocator) -> []byte #no_bounds_check{
if len(data) == 0 {
return nil
}
outi := 0
data := data
out := make([]byte, len(data) / 8 * 5, allocator)
end := false
for len(data) > 0 && !end {
dbuf : [8]byte
dlen := 8
for j := 0; j < 8; {
if len(data) == 0 {
dlen, end = j, true
break
}
input := data[0]
data = data[1:]
if input == byte(PADDING) && j >= 2 && len(data) < 8 {
assert(!(len(data) + j < 8 - 1), "Corrupted input")
for k := 0; k < 8-1-j; k +=1 {
assert(len(data) < k || data[k] == byte(PADDING), "Corrupted input")
}
dlen, end = j, true
assert(dlen != 1 && dlen != 3 && dlen != 6, "Corrupted input")
break
}
dbuf[j] = DEC_TABLE[input]
assert(dbuf[j] != 0xff, "Corrupted input")
j += 1
}
switch dlen {
case 8:
out[outi + 4] = dbuf[6] << 5 | dbuf[7]
fallthrough
case 7:
out[outi + 3] = dbuf[4] << 7 | dbuf[5] << 2 | dbuf[6] >> 3
fallthrough
case 5:
out[outi + 2] = dbuf[3] << 4 | dbuf[4] >> 1
fallthrough
case 4:
out[outi + 1] = dbuf[1] << 6 | dbuf[2] << 1 | dbuf[3] >> 4
fallthrough
case 2:
out[outi + 0] = dbuf[0] << 3 | dbuf[1] >> 2
}
outi += 5
}
return out
}
// Base32 encoding/decoding implementation as specified in RFC 4648.
// [[ More; https://www.rfc-editor.org/rfc/rfc4648.html ]]
package encoding_base32
// @note(zh): Encoding utility for Base32
// A secondary param can be used to supply a custom alphabet to
// @link(encode) and a matching decoding table to @link(decode).
// If none is supplied it just uses the standard Base32 alphabet.
// In case your specific version does not use padding, you may
// truncate it from the encoded output.
// Error represents errors that can occur during base32 decoding operations.
// As per RFC 4648:
// - Section 3.3: Invalid character handling
// - Section 3.2: Padding requirements
// - Section 6: Base32 encoding specifics (including block size requirements)
Error :: enum {
None,
Invalid_Character, // Input contains characters outside the specified alphabet
Invalid_Length, // Input length is not valid for base32 (must be a multiple of 8 with proper padding)
Malformed_Input, // Input has improper structure (wrong padding position or incomplete groups)
}
Validate_Proc :: #type proc(c: byte) -> bool
@private
_validate_default :: proc(c: byte) -> bool {
return (c >= 'A' && c <= 'Z') || (c >= '2' && c <= '7')
}
@(rodata)
ENC_TABLE := [32]byte {
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
'Y', 'Z', '2', '3', '4', '5', '6', '7',
}
PADDING :: '='
@(rodata)
DEC_TABLE := [256]u8 {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
encode :: proc(data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) -> string {
out_length := (len(data) + 4) / 5 * 8
out := make([]byte, out_length, allocator)
_encode(out, data, ENC_TBL)
return string(out[:])
}
@private
_encode :: proc(out, data: []byte, ENC_TBL := ENC_TABLE, allocator := context.allocator) {
out := out
data := data
for len(data) > 0 {
carry: byte
switch len(data) {
case:
out[7] = ENC_TBL[data[4] & 0x1f]
carry = data[4] >> 5
fallthrough
case 4:
out[6] = ENC_TBL[carry | (data[3] << 3) & 0x1f]
out[5] = ENC_TBL[(data[3] >> 2) & 0x1f]
carry = data[3] >> 7
fallthrough
case 3:
out[4] = ENC_TBL[carry | (data[2] << 1) & 0x1f]
carry = (data[2] >> 4) & 0x1f
fallthrough
case 2:
out[3] = ENC_TBL[carry | (data[1] << 4) & 0x1f]
out[2] = ENC_TBL[(data[1] >> 1) & 0x1f]
carry = (data[1] >> 6) & 0x1f
fallthrough
case 1:
out[1] = ENC_TBL[carry | (data[0] << 2) & 0x1f]
out[0] = ENC_TBL[data[0] >> 3]
}
if len(data) < 5 {
out[7] = byte(PADDING)
if len(data) < 4 {
out[6] = byte(PADDING)
out[5] = byte(PADDING)
if len(data) < 3 {
out[4] = byte(PADDING)
if len(data) < 2 {
out[3] = byte(PADDING)
out[2] = byte(PADDING)
}
}
}
break
}
data = data[5:]
out = out[8:]
}
}
@(optimization_mode="favor_size")
decode :: proc(
data: string,
DEC_TBL := DEC_TABLE,
validate: Validate_Proc = _validate_default,
allocator := context.allocator) -> (out: []byte, err: Error) {
if len(data) == 0 {
return nil, .None
}
// Check minimum length requirement first
if len(data) < 2 {
return nil, .Invalid_Length
}
// Validate characters using provided validation function
for i := 0; i < len(data); i += 1 {
c := data[i]
if c == byte(PADDING) {
break
}
if !validate(c) {
return nil, .Invalid_Character
}
}
// Validate padding and length
data_len := len(data)
padding_count := 0
for i := data_len - 1; i >= 0; i -= 1 {
if data[i] != byte(PADDING) {
break
}
padding_count += 1
}
// Check for proper padding and length combinations
if padding_count > 0 {
// Verify no padding in the middle
for i := 0; i < data_len - padding_count; i += 1 {
if data[i] == byte(PADDING) {
return nil, .Malformed_Input
}
}
content_len := data_len - padding_count
mod8 := content_len % 8
required_padding: int
switch mod8 {
case 2: required_padding = 6 // 2 chars need 6 padding chars
case 4: required_padding = 4 // 4 chars need 4 padding chars
case 5: required_padding = 3 // 5 chars need 3 padding chars
case 7: required_padding = 1 // 7 chars need 1 padding char
case: required_padding = 0
}
if required_padding > 0 {
if padding_count != required_padding {
return nil, .Malformed_Input
}
} else if mod8 != 0 {
return nil, .Malformed_Input
}
} else {
// No padding - must be multiple of 8
if data_len % 8 != 0 {
return nil, .Malformed_Input
}
}
// Calculate decoded length: 5 bytes for every 8 input chars
input_chars := data_len - padding_count
out_len := input_chars * 5 / 8
out = make([]byte, out_len, allocator)
defer if err != .None {
delete(out)
}
// Process input in 8-byte blocks
outi := 0
for i := 0; i < input_chars; i += 8 {
buf: [8]byte
block_size := min(8, input_chars - i)
// Decode block
for j := 0; j < block_size; j += 1 {
buf[j] = DEC_TBL[data[i + j]]
}
// Convert to output bytes based on block size
bytes_to_write := block_size * 5 / 8
switch block_size {
case 8:
out[outi + 4] = (buf[6] << 5) | buf[7]
fallthrough
case 7:
out[outi + 3] = (buf[4] << 7) | (buf[5] << 2) | (buf[6] >> 3)
fallthrough
case 5:
out[outi + 2] = (buf[3] << 4) | (buf[4] >> 1)
fallthrough
case 4:
out[outi + 1] = (buf[1] << 6) | (buf[2] << 1) | (buf[3] >> 4)
fallthrough
case 2:
out[outi] = (buf[0] << 3) | (buf[1] >> 2)
}
outi += bytes_to_write
}
return
}

View File

@@ -0,0 +1,227 @@
package encoding_base32
import "core:testing"
import "core:bytes"
@(test)
test_base32_decode_valid :: proc(t: ^testing.T) {
// RFC 4648 Section 10 - Test vectors
cases := [?]struct {
input, expected: string,
}{
{"", ""},
{"MY======", "f"},
{"MZXQ====", "fo"},
{"MZXW6===", "foo"},
{"MZXW6YQ=", "foob"},
{"MZXW6YTB", "fooba"},
{"MZXW6YTBOI======", "foobar"},
}
for c in cases {
output, err := decode(c.input)
if output != nil {
defer delete(output)
}
testing.expect_value(t, err, Error.None)
expected := transmute([]u8)c.expected
if output != nil {
testing.expect(t, bytes.equal(output, expected))
} else {
testing.expect(t, len(c.expected) == 0)
}
}
}
@(test)
test_base32_encode :: proc(t: ^testing.T) {
// RFC 4648 Section 10 - Test vectors
cases := [?]struct {
input, expected: string,
}{
{"", ""},
{"f", "MY======"},
{"fo", "MZXQ===="},
{"foo", "MZXW6==="},
{"foob", "MZXW6YQ="},
{"fooba", "MZXW6YTB"},
{"foobar", "MZXW6YTBOI======"},
}
for c in cases {
output := encode(transmute([]byte)c.input)
defer delete(output)
testing.expect(t, output == c.expected)
}
}
@(test)
test_base32_decode_invalid :: proc(t: ^testing.T) {
// Section 3.3 - Non-alphabet characters
{
// Characters outside alphabet
input := "MZ1W6YTB" // '1' not in alphabet (A-Z, 2-7)
output, err := decode(input)
if output != nil {
defer delete(output)
}
testing.expect_value(t, err, Error.Invalid_Character)
}
{
// Lowercase not allowed
input := "mzxq===="
output, err := decode(input)
if output != nil {
defer delete(output)
}
testing.expect_value(t, err, Error.Invalid_Character)
}
// Section 3.2 - Padding requirements
{
// Padding must only be at end
input := "MZ=Q===="
output, err := decode(input)
if output != nil {
defer delete(output)
}
testing.expect_value(t, err, Error.Malformed_Input)
}
{
// Missing padding
input := "MZXQ" // Should be MZXQ====
output, err := decode(input)
if output != nil {
defer delete(output)
}
testing.expect_value(t, err, Error.Malformed_Input)
}
{
// Incorrect padding length
input := "MZXQ=" // Needs 4 padding chars
output, err := decode(input)
if output != nil {
defer delete(output)
}
testing.expect_value(t, err, Error.Malformed_Input)
}
{
// Too much padding
input := "MY=========" // Extra padding chars
output, err := decode(input)
if output != nil {
defer delete(output)
}
testing.expect_value(t, err, Error.Malformed_Input)
}
// Section 6 - Base32 block size requirements
{
// Single character (invalid block)
input := "M"
output, err := decode(input)
if output != nil {
defer delete(output)
}
testing.expect_value(t, err, Error.Invalid_Length)
}
}
@(test)
test_base32_roundtrip :: proc(t: ^testing.T) {
cases := [?]string{
"",
"f",
"fo",
"foo",
"foob",
"fooba",
"foobar",
}
for input in cases {
encoded := encode(transmute([]byte)input)
defer delete(encoded)
decoded, err := decode(encoded)
if decoded != nil {
defer delete(decoded)
}
testing.expect_value(t, err, Error.None)
testing.expect(t, bytes.equal(decoded, transmute([]byte)input))
}
}
@(test)
test_base32_custom_alphabet :: proc(t: ^testing.T) {
custom_enc_table := [32]byte{
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
}
custom_dec_table: [256]u8
for i := 0; i < len(custom_enc_table); i += 1 {
custom_dec_table[custom_enc_table[i]] = u8(i)
}
/*
custom_dec_table := [256]u8{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00-0x0f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10-0x1f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x20-0x2f
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // 0x30-0x3f ('0'-'9')
0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 0x40-0x4f ('A'-'O')
25, 26, 27, 28, 29, 30, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x50-0x5f ('P'-'V')
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60-0x6f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x70-0x7f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80-0x8f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90-0x9f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0-0xaf
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0-0xbf
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0-0xcf
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0-0xdf
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0-0xef
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0-0xff
}
*/
custom_validate :: proc(c: byte) -> bool {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'V') || c == byte(PADDING)
}
cases := [?]struct {
input: string,
enc_expected: string,
}{
{"f", "CO======"},
{"fo", "CPNG===="},
{"foo", "CPNMU==="},
}
for c in cases {
// Test encoding
encoded := encode(transmute([]byte)c.input, custom_enc_table)
defer delete(encoded)
testing.expect(t, encoded == c.enc_expected)
// Test decoding
decoded, err := decode(encoded, custom_dec_table, custom_validate)
defer if decoded != nil {
delete(decoded)
}
testing.expect_value(t, err, Error.None)
testing.expect(t, bytes.equal(decoded, transmute([]byte)c.input))
}
// Test invalid character detection
{
input := "WXY=====" // Contains chars not in our alphabet
output, err := decode(input, custom_dec_table, custom_validate)
if output != nil {
delete(output)
}
testing.expect_value(t, err, Error.Invalid_Character)
}
}