From e799476f90537ca173205a64da1fbf87b894b42e Mon Sep 17 00:00:00 2001 From: Jeroen van Rijn Date: Fri, 22 Apr 2022 16:55:47 +0200 Subject: [PATCH] [compress/shoco] Add short string compressor. --- core/compress/shoco/model.odin | 148 +++++++++ core/compress/shoco/shoco.odin | 318 ++++++++++++++++++++ examples/all/all_main.odin | 2 + tests/core/assets/Shoco/LICENSE | 26 ++ tests/core/assets/Shoco/LICENSE.shoco | Bin 0 -> 1269 bytes tests/core/assets/Shoco/README.md | 95 ++++++ tests/core/assets/Shoco/README.md.shoco | Bin 0 -> 2227 bytes tests/core/compress/test_core_compress.odin | 57 +++- 8 files changed, 645 insertions(+), 1 deletion(-) create mode 100644 core/compress/shoco/model.odin create mode 100644 core/compress/shoco/shoco.odin create mode 100644 tests/core/assets/Shoco/LICENSE create mode 100644 tests/core/assets/Shoco/LICENSE.shoco create mode 100644 tests/core/assets/Shoco/README.md create mode 100644 tests/core/assets/Shoco/README.md.shoco diff --git a/core/compress/shoco/model.odin b/core/compress/shoco/model.odin new file mode 100644 index 000000000..49e3dd97e --- /dev/null +++ b/core/compress/shoco/model.odin @@ -0,0 +1,148 @@ +/* + This file was generated, so don't edit this by hand. + Transliterated from https://github.com/Ed-von-Schleck/shoco/blob/master/shoco_model.h, + which is an English word model. +*/ + +// package shoco is an implementation of the shoco short string compressor +package shoco + +DEFAULT_MODEL :: Shoco_Model { + min_char = 39, + max_char = 122, + characters_by_id = { + 'e', 'a', 'i', 'o', 't', 'h', 'n', 'r', 's', 'l', 'u', 'c', 'w', 'm', 'd', 'b', 'p', 'f', 'g', 'v', 'y', 'k', '-', 'H', 'M', 'T', '\'', 'B', 'x', 'I', 'W', 'L', + }, + ids_by_character = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 26, -1, -1, -1, -1, -1, 22, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 27, -1, -1, -1, -1, -1, 23, 29, -1, -1, 31, 24, -1, -1, -1, -1, -1, -1, 25, -1, -1, 30, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 15, 11, 14, 0, 17, 18, 5, 2, -1, 21, 9, 13, 6, 3, 16, -1, 7, 8, 4, 10, 19, 12, 28, 20, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + }, + successors_by_bigram = { + {7, 4, 12, -1, 6, -1, 1, 0, 3, 5, -1, 9, -1, 8, 2, -1, 15, 14, -1, 10, 11, -1, -1, -1, -1, -1, -1, -1, 13, -1, -1, -1}, + {-1, -1, 6, -1, 1, -1, 0, 3, 2, 4, 15, 11, -1, 9, 5, 10, 13, -1, 12, 8, 7, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {9, 11, -1, 4, 2, -1, 0, 8, 1, 5, -1, 6, -1, 3, 7, 15, -1, 12, 10, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, 14, 7, 5, -1, 1, 2, 8, 9, 0, 15, 6, 4, 11, -1, 12, 3, -1, 10, -1, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 4, 3, 1, 5, 0, -1, 6, 10, 9, 7, 12, 11, -1, -1, -1, -1, 13, -1, -1, 8, -1, 15, -1, -1, -1, 14, -1, -1, -1, -1, -1}, + {0, 1, 2, 3, 4, -1, -1, 5, 9, 10, 6, -1, -1, 8, 15, 11, -1, 14, -1, -1, 7, -1, 13, -1, -1, -1, 12, -1, -1, -1, -1, -1}, + {2, 8, 7, 4, 3, -1, 9, -1, 6, 11, -1, 5, -1, -1, 0, -1, -1, 14, 1, 15, 10, 12, -1, -1, -1, -1, 13, -1, -1, -1, -1, -1}, + {0, 3, 1, 2, 6, -1, 9, 8, 4, 12, 13, 10, -1, 11, 7, -1, -1, 15, 14, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 6, 3, 4, 1, 2, -1, -1, 5, 10, 7, 9, 11, 12, -1, -1, 8, 14, -1, -1, 15, 13, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 6, 2, 5, 9, -1, -1, -1, 10, 1, 8, -1, 12, 14, 4, -1, 15, 7, -1, 13, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {8, 10, 9, 15, 1, -1, 4, 0, 3, 2, -1, 6, -1, 12, 11, 13, 7, 14, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 3, 6, 0, 4, 2, -1, 7, 13, 8, 9, 11, -1, -1, 15, -1, -1, -1, -1, -1, 10, 5, 14, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {3, 0, 1, 4, -1, 2, 5, 6, 7, 8, -1, 14, -1, -1, 9, 15, -1, 12, -1, -1, -1, 10, 11, -1, -1, -1, 13, -1, -1, -1, -1, -1}, + {0, 1, 3, 2, 15, -1, 12, -1, 7, 14, 4, -1, -1, 9, -1, 8, 5, 10, -1, -1, 6, -1, 13, -1, -1, -1, 11, -1, -1, -1, -1, -1}, + {0, 3, 1, 2, -1, -1, 12, 6, 4, 9, 7, -1, -1, 14, 8, -1, -1, 15, 11, 13, 5, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 5, 7, 2, 10, 13, -1, 6, 8, 1, 3, -1, -1, 14, 15, 11, -1, -1, -1, 12, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 2, 6, 3, 7, 10, -1, 1, 9, 4, 8, -1, -1, 15, -1, 12, 5, -1, -1, -1, 11, -1, 13, -1, -1, -1, 14, -1, -1, -1, -1, -1}, + {1, 3, 4, 0, 7, -1, 12, 2, 11, 8, 6, 13, -1, -1, -1, -1, -1, 5, -1, -1, 10, 15, 9, -1, -1, -1, 14, -1, -1, -1, -1, -1}, + {1, 3, 5, 2, 13, 0, 9, 4, 7, 6, 8, -1, -1, 15, -1, 11, -1, -1, 10, -1, 14, -1, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 2, 1, 3, -1, -1, -1, 6, -1, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {1, 11, 4, 0, 3, -1, 13, 12, 2, 7, -1, -1, 15, 10, 5, 8, 14, -1, -1, -1, -1, -1, 9, -1, -1, -1, 6, -1, -1, -1, -1, -1}, + {0, 9, 2, 14, 15, 4, 1, 13, 3, 5, -1, -1, 10, -1, -1, -1, -1, 6, 12, -1, 7, -1, 8, -1, -1, -1, 11, -1, -1, -1, -1, -1}, + {-1, 2, 14, -1, 1, 5, 8, 7, 4, 12, -1, 6, 9, 11, 13, 3, 10, 15, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {0, 1, 3, 2, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {4, 3, 1, 5, -1, -1, -1, 0, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {2, 8, 4, 1, -1, 0, -1, 6, -1, -1, 5, -1, 7, -1, -1, -1, -1, -1, -1, -1, 10, -1, -1, 9, -1, -1, -1, -1, -1, -1, -1, -1}, + {12, 5, -1, -1, 1, -1, -1, 7, 0, 3, -1, 2, -1, 4, 6, -1, -1, -1, -1, 8, -1, -1, 15, -1, 13, 9, -1, -1, -1, -1, -1, 11}, + {1, 3, 2, 4, -1, -1, -1, 5, -1, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1, -1, -1, -1, -1, -1, -1, 8, -1, -1}, + {5, 3, 4, 12, 1, 6, -1, -1, -1, -1, 8, 2, -1, -1, -1, -1, 0, 9, -1, -1, 11, -1, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, 0, -1, 1, 12, 3, -1, -1, -1, -1, 5, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, 4, -1, -1, 6, -1, 10}, + {2, 3, 1, 4, -1, 0, -1, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 7, -1, -1, -1, -1, -1, -1, -1, -1, 6, -1, -1}, + {5, 1, 3, 0, -1, -1, -1, -1, -1, -1, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, 2, -1, -1, -1, -1, -1, 9, -1, -1, 6, -1, 7}, + }, + successors_reversed = { + {'s', 't', 'c', 'l', 'm', 'a', 'd', 'r', 'v', 'T', 'A', 'L', 'e', 'M', 'Y', '-'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'-', 't', 'a', 'b', 's', 'h', 'c', 'r', 'n', 'w', 'p', 'm', 'l', 'd', 'i', 'f'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'u', 'e', 'i', 'a', 'o', 'r', 'y', 'l', 'I', 'E', 'R', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'e', 'a', 'o', 'i', 'u', 'A', 'y', 'E', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'t', 'n', 'f', 's', '\'', 'm', 'I', 'N', 'A', 'E', 'L', 'Z', 'r', 'V', 'R', 'C'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'o', 'a', 'y', 'i', 'u', 'e', 'I', 'L', 'D', '\'', 'E', 'Y', '\x00', '\x00', '\x00', '\x00'}, + {'r', 'i', 'y', 'a', 'e', 'o', 'u', 'Y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'h', 'o', 'e', 'E', 'i', 'u', 'r', 'w', 'a', 'H', 'y', 'R', 'Z', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'h', 'i', 'e', 'a', 'o', 'r', 'I', 'y', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'n', 't', 's', 'r', 'l', 'd', 'i', 'y', 'v', 'm', 'b', 'c', 'g', 'p', 'k', 'u'}, + {'e', 'l', 'o', 'u', 'y', 'a', 'r', 'i', 's', 'j', 't', 'b', 'v', 'h', 'm', 'd'}, + {'o', 'e', 'h', 'a', 't', 'k', 'i', 'r', 'l', 'u', 'y', 'c', 'q', 's', '-', 'd'}, + {'e', 'i', 'o', 'a', 's', 'y', 'r', 'u', 'd', 'l', '-', 'g', 'n', 'v', 'm', 'f'}, + {'r', 'n', 'd', 's', 'a', 'l', 't', 'e', 'm', 'c', 'v', 'y', 'i', 'x', 'f', 'p'}, + {'o', 'e', 'r', 'a', 'i', 'f', 'u', 't', 'l', '-', 'y', 's', 'n', 'c', '\'', 'k'}, + {'h', 'e', 'o', 'a', 'r', 'i', 'l', 's', 'u', 'n', 'g', 'b', '-', 't', 'y', 'm'}, + {'e', 'a', 'i', 'o', 't', 'r', 'u', 'y', 'm', 's', 'l', 'b', '\'', '-', 'f', 'd'}, + {'n', 's', 't', 'm', 'o', 'l', 'c', 'd', 'r', 'e', 'g', 'a', 'f', 'v', 'z', 'b'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'e', 'n', 'i', 's', 'h', 'l', 'f', 'y', '-', 'a', 'w', '\'', 'g', 'r', 'o', 't'}, + {'e', 'l', 'i', 'y', 'd', 'o', 'a', 'f', 'u', 't', 's', 'k', 'w', 'v', 'm', 'p'}, + {'e', 'a', 'o', 'i', 'u', 'p', 'y', 's', 'b', 'm', 'f', '\'', 'n', '-', 'l', 't'}, + {'d', 'g', 'e', 't', 'o', 'c', 's', 'i', 'a', 'n', 'y', 'l', 'k', '\'', 'f', 'v'}, + {'u', 'n', 'r', 'f', 'm', 't', 'w', 'o', 's', 'l', 'v', 'd', 'p', 'k', 'i', 'c'}, + {'e', 'r', 'a', 'o', 'l', 'p', 'i', 't', 'u', 's', 'h', 'y', 'b', '-', '\'', 'm'}, + {'\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'e', 'i', 'o', 'a', 's', 'y', 't', 'd', 'r', 'n', 'c', 'm', 'l', 'u', 'g', 'f'}, + {'e', 't', 'h', 'i', 'o', 's', 'a', 'u', 'p', 'c', 'l', 'w', 'm', 'k', 'f', 'y'}, + {'h', 'o', 'e', 'i', 'a', 't', 'r', 'u', 'y', 'l', 's', 'w', 'c', 'f', '\'', '-'}, + {'r', 't', 'l', 's', 'n', 'g', 'c', 'p', 'e', 'i', 'a', 'd', 'm', 'b', 'f', 'o'}, + {'e', 'i', 'a', 'o', 'y', 'u', 'r', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00', '\x00'}, + {'a', 'i', 'h', 'e', 'o', 'n', 'r', 's', 'l', 'd', 'k', '-', 'f', '\'', 'c', 'b'}, + {'p', 't', 'c', 'a', 'i', 'e', 'h', 'q', 'u', 'f', '-', 'y', 'o', '\x00', '\x00', '\x00'}, + {'o', 'e', 's', 't', 'i', 'd', '\'', 'l', 'b', '-', 'm', 'a', 'r', 'n', 'p', 'w'}, + }, + + character_count = 32, + successor_count = 16, + + max_successor_n = 7, + packs = { + { 0x80000000, 1, 2, { 26, 24, 24, 24, 24, 24, 24, 24 }, { 15, 3, 0, 0, 0, 0, 0, 0 }, 0xc0, 0x80 }, + { 0xc0000000, 2, 4, { 25, 22, 19, 16, 16, 16, 16, 16 }, { 15, 7, 7, 7, 0, 0, 0, 0 }, 0xe0, 0xc0 }, + { 0xe0000000, 4, 8, { 23, 19, 15, 11, 8, 5, 2, 0 }, { 31, 15, 15, 15, 7, 7, 7, 3 }, 0xf0, 0xe0 }, + }, +} \ No newline at end of file diff --git a/core/compress/shoco/shoco.odin b/core/compress/shoco/shoco.odin new file mode 100644 index 000000000..9c5008f5d --- /dev/null +++ b/core/compress/shoco/shoco.odin @@ -0,0 +1,318 @@ +/* + Copyright 2022 Jeroen van Rijn . + Made available under Odin's BSD-3 license. + + List of contributors: + Jeroen van Rijn: Initial implementation. + + An implementation of [shoco](https://github.com/Ed-von-Schleck/shoco) by Christian Schramm. +*/ + +// package shoco is an implementation of the shoco short string compressor +package shoco + +import "core:intrinsics" +import "core:compress" + +Shoco_Pack :: struct { + word: u32, + bytes_packed: i8, + bytes_unpacked: i8, + offsets: [8]u16, + masks: [8]i16, + header_mask: u8, + header: u8, +} + +Shoco_Model :: struct { + min_char: u8, + max_char: u8, + characters_by_id: []u8, + ids_by_character: [256]i16, + successors_by_bigram: [][]i8, + successors_reversed: [][]u8, + + character_count: u8, + successor_count: u8, + max_successor_n: i8, + packs: []Shoco_Pack, +} + +compress_bound :: proc(uncompressed_size: int) -> (worst_case_compressed_size: int) { + // Worst case compression happens when input is non-ASCII (128-255) + // Encoded as 0x00 + the byte in question. + return uncompressed_size * 2 +} + +decompress_bound :: proc(compressed_size: int, model := DEFAULT_MODEL) -> (maximum_decompressed_size: int) { + // Best case compression is 2:1 + most: f64 + for pack in model.packs { + val := f64(compressed_size) / f64(pack.bytes_packed) * f64(pack.bytes_unpacked) + most = max(most, val) + } + return int(most) +} + +find_best_encoding :: proc(indices: []i16, n_consecutive: i8, model := DEFAULT_MODEL) -> (res: int) { + for p := len(model.packs); p > 0; p -= 1 { + pack := model.packs[p - 1] + if n_consecutive >= pack.bytes_unpacked { + have_index := true + for i := 0; i < int(pack.bytes_unpacked); i += 1 { + if indices[i] > pack.masks[i] { + have_index = false + break + } + } + if have_index { + return p - 1 + } + } + } + return -1 +} + +validate_model :: proc(model: Shoco_Model) -> (int, compress.Error) { + if len(model.successors_reversed) != int(model.max_char - model.min_char) { + return 0, .Unknown_Compression_Method + } + + if len(model.characters_by_id) != int(model.character_count) { + return 0, .Unknown_Compression_Method + } + + if len(model.successors_by_bigram) != int(model.character_count) || len(model.successors_by_bigram[0]) != int(model.character_count) { + return 0, .Unknown_Compression_Method + } + + if len(model.successors_reversed[0]) != int(model.successor_count) { + return 0, .Unknown_Compression_Method + } + + // Model seems legit. + return 0, nil +} + +// Decompresses into provided buffer. +decompress_slice_to_output_buffer :: proc(input: []u8, output: []u8, model := DEFAULT_MODEL) -> (size: int, err: compress.Error) { + inp, inp_end := 0, len(input) + out, out_end := 0, len(output) + + validate_model(model) or_return + + for inp < inp_end { + val := transmute(i8)input[inp] + mark := int(-1) + + for val < 0 { + val <<= 1 + mark += 1 + } + + if mark > len(model.packs) { + return out, .Unknown_Compression_Method + } + + if mark < 0 { + if out >= out_end { + return out, .Output_Too_Short + } + + // Ignore the sentinel value for non-ASCII chars + if input[inp] == 0x00 { + inp += 1 + if inp >= inp_end { + return out, .Stream_Too_Short + } + } + output[out] = input[inp] + inp, out = inp + 1, out + 1 + + } else { + pack := model.packs[mark] + + if out + int(pack.bytes_unpacked) > out_end { + return out, .Output_Too_Short + } else if inp + int(pack.bytes_packed) > inp_end { + return out, .Stream_Too_Short + } + + code := intrinsics.unaligned_load((^u32)(&input[inp])) + when ODIN_ENDIAN == .Little { + code = intrinsics.byte_swap(code) + } + + // Unpack the leading char + offset := pack.offsets[0] + mask := pack.masks[0] + + last_chr := model.characters_by_id[(code >> offset) & u32(mask)] + output[out] = last_chr + + // Unpack the successor chars + for i := 1; i < int(pack.bytes_unpacked); i += 1 { + offset = pack.offsets[i] + mask = pack.masks[i] + + last_chr = model.successors_reversed[last_chr - model.min_char][(code >> offset) & u32(mask)] + output[out + i] = last_chr + } + + out += int(pack.bytes_unpacked) + inp += int(pack.bytes_packed) + } + } + + return out, nil +} + +decompress_slice_to_string :: proc(input: []u8, model := DEFAULT_MODEL, allocator := context.allocator) -> (res: string, err: compress.Error) { + context.allocator = allocator + + if len(input) == 0 { + return "", .Stream_Too_Short + } + + max_output_size := decompress_bound(len(input), model) + + buf: [dynamic]u8 + if !resize(&buf, max_output_size) { + return "", .Out_Of_Memory + } + + length, result := decompress_slice_to_output_buffer(input, buf[:]) + resize(&buf, length) + return string(buf[:]), result +} +decompress :: proc{decompress_slice_to_output_buffer, decompress_slice_to_string} + +compress_string_to_buffer :: proc(input: string, output: []u8, model := DEFAULT_MODEL, allocator := context.allocator) -> (size: int, err: compress.Error) { + inp, inp_end := 0, len(input) + out, out_end := 0, len(output) + output := output + + validate_model(model) or_return + + indices := make([]i16, model.max_successor_n + 1) + defer delete(indices) + + last_resort := false + + encode: for inp < inp_end { + if last_resort { + last_resort = false + + if input[inp] & 0x80 == 0x80 { + // Non-ASCII case + if out + 2 > out_end { + return out, .Output_Too_Short + } + + // Put in a sentinel byte + output[out] = 0x00 + out += 1 + } else { + // An ASCII byte + if out + 1 > out_end { + return out, .Output_Too_Short + } + } + output[out] = input[inp] + out, inp = out + 1, inp + 1 + } else { + // Find the longest string of known successors + indices[0] = model.ids_by_character[input[inp]] + last_chr_index := indices[0] + + if last_chr_index < 0 { + last_resort = true + continue encode + } + + rest := inp_end - inp + n_consecutive: i8 = 1 + for ; n_consecutive <= model.max_successor_n; n_consecutive += 1 { + if inp_end > 0 && int(n_consecutive) == rest { + break + } + + current_index := model.ids_by_character[input[inp + int(n_consecutive)]] + if current_index < 0 { // '\0' is always -1 + break + } + + successor_index := model.successors_by_bigram[last_chr_index][current_index] + if successor_index < 0 { + break + } + + indices[n_consecutive] = i16(successor_index) + last_chr_index = current_index + } + + if n_consecutive < 2 { + last_resort = true + continue encode + } + + pack_n := find_best_encoding(indices, n_consecutive) + if pack_n >= 0 { + if out + int(model.packs[pack_n].bytes_packed) > out_end { + return out, .Output_Too_Short + } + + pack := model.packs[pack_n] + code := pack.word + + for i := 0; i < int(pack.bytes_unpacked); i += 1 { + code |= u32(indices[i]) << pack.offsets[i] + } + + // In the little-endian world, we need to swap what's in the register to match the memory representation. + when ODIN_ENDIAN == .Little { + code = intrinsics.byte_swap(code) + } + out_ptr := raw_data(output[out:]) + + switch pack.bytes_packed { + case 4: + intrinsics.unaligned_store(transmute(^u32)out_ptr, code) + case 2: + intrinsics.unaligned_store(transmute(^u16)out_ptr, u16(code)) + case 1: + intrinsics.unaligned_store(transmute(^u8)out_ptr, u8(code)) + case: + return out, .Unknown_Compression_Method + } + + out += int(pack.bytes_packed) + inp += int(pack.bytes_unpacked) + } else { + last_resort = true + continue encode + } + } + } + return out, nil +} + +compress_string :: proc(input: string, model := DEFAULT_MODEL, allocator := context.allocator) -> (output: []u8, err: compress.Error) { + context.allocator = allocator + + if len(input) == 0 { + return {}, .Stream_Too_Short + } + + max_output_size := compress_bound(len(input)) + + buf: [dynamic]u8 + if !resize(&buf, max_output_size) { + return {}, .Out_Of_Memory + } + + length, result := compress_string_to_buffer(input, buf[:]) + resize(&buf, length) + return buf[:length], result +} +compress :: proc{compress_string_to_buffer, compress_string} \ No newline at end of file diff --git a/examples/all/all_main.odin b/examples/all/all_main.odin index 4f5bfbdc1..27f199062 100644 --- a/examples/all/all_main.odin +++ b/examples/all/all_main.odin @@ -10,6 +10,7 @@ import c "core:c" import libc "core:c/libc" import compress "core:compress" +import shoco "core:compress/shoco" import gzip "core:compress/gzip" import zlib "core:compress/zlib" @@ -115,6 +116,7 @@ _ :: bytes _ :: c _ :: libc _ :: compress +_ :: shoco _ :: gzip _ :: zlib _ :: bit_array diff --git a/tests/core/assets/Shoco/LICENSE b/tests/core/assets/Shoco/LICENSE new file mode 100644 index 000000000..9ca94bcdf --- /dev/null +++ b/tests/core/assets/Shoco/LICENSE @@ -0,0 +1,26 @@ +Copyright (c) 2016-2021 Ginger Bill. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/tests/core/assets/Shoco/LICENSE.shoco b/tests/core/assets/Shoco/LICENSE.shoco new file mode 100644 index 0000000000000000000000000000000000000000..5d5e4d6236019bf208cc2ebb7a81ed06469b78a8 GIT binary patch literal 1269 zcmZ?HFQ}ZGo>8Kpk*uj;WMF8fYh++#sNjBtvq-@ybE%$!<5C5Xe6hkgiK4P5Jua@G z1Dz$0`)3v^w4^AkPF3hoxX7>;#7^pHuT)6u%PrPXxDlYxr*I>nr-WM6i1C|2kPo4sa- z!fi=~b0!@iO^N%;QWe%f-F#Mit&YM8K`sRag-e|Z{V=myQa~yp#_xRMW0s)D#btzS zZifQQu^^M@6wJH(5u^jdY>>ex1o4}#0Cv`{r{@?8GkZZAT2l1;6plL-Dctg!o0wA! z@$roSkOg-?@7#bvCg;Fo%&p$H_Hm$*ftAH;z9YWigcg}gqHNM7O=P#|;_gUr3g zJ@?+pqGE-ul?r(YOC^-B3g#>vzg@*VC1uG~y1}k_5 zD{+A&9Q`5{Tq6R4T!VuZ{DTxceFJUI-tSznemE zs8eu=XGmy>tAe|~zY8d!xq@AT!aSW_gRK>O{6RtQrVtwJs-xiI7~%-FI3UR1%`+s} z8srG4&|pt+pn3X*xCRA<284L}`)Mk8_=mfOxdw4LJB9|kx`0F1-%kM)p&=fw{y~wT zC;){4IPi58!aZC=JY0i70SXQ&M^In{hXi>#gW?k!M><^oK?))MK_N(fRPb|k_wjUh z^>cOwxyB!4O1NjRtEPfukY})`pF7A%khS5CkqZ8yAs~A}DKRwImCN5v0VT!ifMZg@ i(@nwACCn4#Y_JMfg#iEHU{7c|0SB40hXN$H^|%1|ER=2l literal 0 HcmV?d00001 diff --git a/tests/core/assets/Shoco/README.md b/tests/core/assets/Shoco/README.md new file mode 100644 index 000000000..9e46f80d0 --- /dev/null +++ b/tests/core/assets/Shoco/README.md @@ -0,0 +1,95 @@ +

+ Odin logo +
+ The Data-Oriented Language for Sane Software Development. +
+
+ + + + + + +
+ + + + + + +

+ +# The Odin Programming Language + + +Odin is a general-purpose programming language with distinct typing, built for high performance, modern systems, and built-in data-oriented data types. The Odin Programming Language, the C alternative for the joy of programming. + +Website: [https://odin-lang.org/](https://odin-lang.org/) + +```odin +package main + +import "core:fmt" + +main :: proc() { + program := "+ + * 😃 - /" + accumulator := 0 + + for token in program { + switch token { + case '+': accumulator += 1 + case '-': accumulator -= 1 + case '*': accumulator *= 2 + case '/': accumulator /= 2 + case '😃': accumulator *= accumulator + case: // Ignore everything else + } + } + + fmt.printf("The program \"%s\" calculates the value %d\n", + program, accumulator) +} + +``` + +## Documentation + +#### [Getting Started](https://odin-lang.org/docs/install) + +Instructions for downloading and installing the Odin compiler and libraries. + +#### [Nightly Builds](https://odin-lang.org/docs/nightly/) + +Get the latest nightly builds of Odin. + +### Learning Odin + +#### [Overview of Odin](https://odin-lang.org/docs/overview) + +An overview of the Odin programming language. + +#### [Frequently Asked Questions (FAQ)](https://odin-lang.org/docs/faq) + +Answers to common questions about Odin. + +#### [Packages](https://pkg.odin-lang.org/) + +Documentation for all the official packages part of the [core](https://pkg.odin-lang.org/core/) and [vendor](https://pkg.odin-lang.org/vendor/) library collections. + +#### [The Odin Wiki](https://github.com/odin-lang/Odin/wiki) + +A wiki maintained by the Odin community. + +#### [Odin Discord](https://discord.gg/sVBPHEv) + +Get live support and talk with other odiners on the Odin Discord. + +### Articles + +#### [The Odin Blog](https://odin-lang.org/news/) + +The official blog of the Odin programming language, featuring announcements, news, and in-depth articles by the Odin team and guests. + +## Warnings + +* The Odin compiler is still in development. diff --git a/tests/core/assets/Shoco/README.md.shoco b/tests/core/assets/Shoco/README.md.shoco new file mode 100644 index 0000000000000000000000000000000000000000..013f4f46928fb3d5c6ea62d76ddda91f29c6e420 GIT binary patch literal 2227 zcmcC1P)J;so@c9c&AUO#j!Qv7LBXawU7@%r*;Z+Lak73+etN!c@v>aKf|*JRi8&>< zO8z_Z6hNX%3JWWj*eY#InP6pRstVJaRHP3!C1i?%OKYO8|J(ynDGEL<>7|M3sS0U* z3c)R@3c>v)HzX8XQp-|v@(XTp>p^UV=~S>muo4wA=A_vwWt5Z@6kF-*ryprc(p!_O zpT9Ft_i`hM(tq64;;etUYa!Hr6!*ilb*Jks$~>5&mzfVXLI0daTPjF1)C|4ivUI3+ z8~sGE>lJLMZgWy%N_wh(K~8I0Uv9B(_z|Y^VpSu98dW0$pN`TBD06GFf3R-)IZLcg zN-Dzcww=Y-#P!nCK|Z?N2=xe|Z~*DiH#M^`G`6rbHMB4`Gc-3cF*39VMXW7I5^4@9 z0g`xKCa)N(53BuHqOZI!JMD70zO$!3IDA0nQBc%qan-%xUQv3Iz=>8Y4w@6t?Xs%2Oz=T$q|$tfSD90#>8j zp>XGn?!yTuvlQ-}0cmd5!{H1ag$bz&&I*Y+lZx_MGt0o~enP53R(_>IKg=%fB5T%4TOE-0vLt6M2Rb!pow7(zvL5u&;X zQ7we1maT#jRINTjR39M_4{ov}E3c)Akny|&ruH<6$;pjshf~G%FOemq2_FeN6zDIoAaAH9;{FU3S2!2$v#|oW7>Q zviq|u71liI@F^^YI}=(Sg=c1GB6Opbcm6x`^fzW_f;_IUF*{S?SvT{HCWWL*kdr}y z`?SetUM1W_kcdm?HF1P-pkf(R&KHL{1$elYK@-8UvQ&kOfBQ;6t}jW<$yT@#pm5xw zNFo1@Q&F)(FU)X|Q4q5g9M8{NQVdHJP=`C^5#w# zxeqlt=qSAC;%7-$c+j$7>MibK9feutkkYe5cSpelg+rf{mlPv;cT!?5$j#}c%_Wd< fR0wZBd$5>`OAA)4g0o4d!iCrzg${)s@YVtV8=oH9 literal 0 HcmV?d00001 diff --git a/tests/core/compress/test_core_compress.odin b/tests/core/compress/test_core_compress.odin index 51952a568..ee7233e52 100644 --- a/tests/core/compress/test_core_compress.odin +++ b/tests/core/compress/test_core_compress.odin @@ -7,13 +7,14 @@ package test_core_compress List of contributors: Jeroen van Rijn: Initial implementation. - A test suite for ZLIB, GZIP. + A test suite for ZLIB, GZIP and Shoco. */ import "core:testing" import "core:compress/zlib" import "core:compress/gzip" +import "core:compress/shoco" import "core:bytes" import "core:fmt" @@ -48,6 +49,7 @@ main :: proc() { t := testing.T{w=w} zlib_test(&t) gzip_test(&t) + shoco_test(&t) fmt.printf("%v/%v tests successful.\n", TEST_count - TEST_fail, TEST_count) if TEST_fail > 0 { @@ -134,3 +136,56 @@ gzip_test :: proc(t: ^testing.T) { expect(t, false, error) } } + +@test +shoco_test :: proc(t: ^testing.T) { + + Shoco_Tests :: []struct{ + compressed: []u8, + raw: []u8, + short_pack: int, + short_sentinel: int, + }{ + { #load("../assets/Shoco/README.md.shoco"), #load("../assets/Shoco/README.md"), 10, 1006 }, + { #load("../assets/Shoco/LICENSE.shoco"), #load("../assets/Shoco/LICENSE"), 25, 68 }, + } + + for v in Shoco_Tests { + expected_raw := len(v.raw) + expected_compressed := len(v.compressed) + + biggest_unpacked := shoco.decompress_bound(expected_compressed) + biggest_packed := shoco.compress_bound(expected_raw) + + buffer := make([]u8, max(biggest_packed, biggest_unpacked)) + defer delete(buffer) + + size, err := shoco.decompress(v.compressed, buffer[:]) + msg := fmt.tprintf("Expected `decompress` to return `nil`, got %v", err) + expect(t, err == nil, msg) + + msg = fmt.tprintf("Decompressed %v bytes into %v. Expected to decompress into %v bytes.", len(v.compressed), size, expected_raw) + expect(t, size == expected_raw, msg) + expect(t, string(buffer[:size]) == string(v.raw), "Decompressed contents don't match.") + + size, err = shoco.compress(string(v.raw), buffer[:]) + expect(t, err == nil, "Expected `compress` to return `nil`.") + + msg = fmt.tprintf("Compressed %v bytes into %v. Expected to compress into %v bytes.", expected_raw, size, expected_compressed) + expect(t, size == expected_compressed, msg) + + size, err = shoco.decompress(v.compressed, buffer[:expected_raw - 10]) + msg = fmt.tprintf("Decompressing into too small a buffer returned %v, expected `.Output_Too_Short`", err) + expect(t, err == .Output_Too_Short, msg) + + size, err = shoco.compress(string(v.raw), buffer[:expected_compressed - 10]) + msg = fmt.tprintf("Compressing into too small a buffer returned %v, expected `.Output_Too_Short`", err) + expect(t, err == .Output_Too_Short, msg) + + size, err = shoco.decompress(v.compressed[:v.short_pack], buffer[:]) + expect(t, err == .Stream_Too_Short, "Expected `decompress` to return `Stream_Too_Short` because there was no more data after selecting a pack.") + + size, err = shoco.decompress(v.compressed[:v.short_sentinel], buffer[:]) + expect(t, err == .Stream_Too_Short, "Expected `decompress` to return `Stream_Too_Short` because there was no more data after non-ASCII sentinel.") + } +} \ No newline at end of file