mirror of
https://github.com/odin-lang/Odin.git
synced 2026-04-18 20:40:28 +00:00
Merge branch 'master' into zlib_optimize
This commit is contained in:
340
core/bufio/scanner.odin
Normal file
340
core/bufio/scanner.odin
Normal file
@@ -0,0 +1,340 @@
|
||||
package bufio
|
||||
|
||||
import "core:bytes"
|
||||
import "core:io"
|
||||
import "core:mem"
|
||||
import "core:unicode/utf8"
|
||||
import "intrinsics"
|
||||
|
||||
// Extra errors returns by scanning procedures
|
||||
Scanner_Extra_Error :: enum i32 {
|
||||
Negative_Advance,
|
||||
Advanced_Too_Far,
|
||||
Bad_Read_Count,
|
||||
Too_Long,
|
||||
Too_Short,
|
||||
}
|
||||
|
||||
Scanner_Error :: union {
|
||||
io.Error,
|
||||
Scanner_Extra_Error,
|
||||
}
|
||||
|
||||
// Split_Proc is the signature of the split procedure used to tokenize the input.
|
||||
Split_Proc :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool);
|
||||
|
||||
Scanner :: struct {
|
||||
r: io.Reader,
|
||||
split: Split_Proc,
|
||||
|
||||
buf: [dynamic]byte,
|
||||
max_token_size: int,
|
||||
start: int,
|
||||
end: int,
|
||||
token: []byte,
|
||||
|
||||
_err: Scanner_Error,
|
||||
max_consecutive_empty_reads: int,
|
||||
successive_empty_token_count: int,
|
||||
scan_called: bool,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
DEFAULT_MAX_SCAN_TOKEN_SIZE :: 1<<16;
|
||||
|
||||
@(private)
|
||||
_INIT_BUF_SIZE :: 4096;
|
||||
|
||||
scanner_init :: proc(s: ^Scanner, r: io.Reader, buf_allocator := context.allocator) -> ^Scanner {
|
||||
s.r = r;
|
||||
s.split = scan_lines;
|
||||
s.max_token_size = DEFAULT_MAX_SCAN_TOKEN_SIZE;
|
||||
s.buf.allocator = buf_allocator;
|
||||
return s;
|
||||
}
|
||||
scanner_init_with_buffer :: proc(s: ^Scanner, r: io.Reader, buf: []byte) -> ^Scanner {
|
||||
s.r = r;
|
||||
s.split = scan_lines;
|
||||
s.max_token_size = DEFAULT_MAX_SCAN_TOKEN_SIZE;
|
||||
s.buf = mem.buffer_from_slice(buf);
|
||||
resize(&s.buf, cap(s.buf));
|
||||
return s;
|
||||
}
|
||||
scanner_destroy :: proc(s: ^Scanner) {
|
||||
delete(s.buf);
|
||||
}
|
||||
|
||||
|
||||
// Returns the first non-EOF error that was encounted by the scanner
|
||||
scanner_error :: proc(s: ^Scanner) -> Scanner_Error {
|
||||
switch s._err {
|
||||
case .EOF, .None:
|
||||
return nil;
|
||||
}
|
||||
return s._err;
|
||||
}
|
||||
|
||||
// Returns the most recent token created by scanner_scan.
|
||||
// The underlying array may point to data that may be overwritten
|
||||
// by another call to scanner_scan.
|
||||
// Treat the returned value as if it is immutable.
|
||||
scanner_bytes :: proc(s: ^Scanner) -> []byte {
|
||||
return s.token;
|
||||
}
|
||||
|
||||
// Returns the most recent token created by scanner_scan.
|
||||
// The underlying array may point to data that may be overwritten
|
||||
// by another call to scanner_scan.
|
||||
// Treat the returned value as if it is immutable.
|
||||
scanner_text :: proc(s: ^Scanner) -> string {
|
||||
return string(s.token);
|
||||
}
|
||||
|
||||
// scanner_scan advances the scanner
|
||||
scanner_scan :: proc(s: ^Scanner) -> bool {
|
||||
set_err :: proc(s: ^Scanner, err: Scanner_Error) {
|
||||
err := err;
|
||||
if err == .None {
|
||||
err = nil;
|
||||
}
|
||||
switch s._err {
|
||||
case nil, .EOF:
|
||||
s._err = err;
|
||||
}
|
||||
}
|
||||
|
||||
if s.done {
|
||||
return false;
|
||||
}
|
||||
s.scan_called = true;
|
||||
|
||||
for {
|
||||
// Check if a token is possible with what is available
|
||||
// Allow the split procedure to recover if it fails
|
||||
if s.start < s.end || s._err != nil {
|
||||
advance, token, err, final_token := s.split(s.buf[s.start:s.end], s._err != nil);
|
||||
if final_token {
|
||||
s.token = token;
|
||||
s.done = true;
|
||||
return true;
|
||||
}
|
||||
if err != nil {
|
||||
set_err(s, err);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Do advance
|
||||
if advance < 0 {
|
||||
set_err(s, .Negative_Advance);
|
||||
return false;
|
||||
}
|
||||
if advance > s.end-s.start {
|
||||
set_err(s, .Advanced_Too_Far);
|
||||
return false;
|
||||
}
|
||||
s.start += advance;
|
||||
|
||||
s.token = token;
|
||||
if s.token != nil {
|
||||
if s._err == nil || advance > 0 {
|
||||
s.successive_empty_token_count = 0;
|
||||
} else {
|
||||
s.successive_empty_token_count += 1;
|
||||
|
||||
if s.max_consecutive_empty_reads <= 0 {
|
||||
s.max_consecutive_empty_reads = DEFAULT_MAX_CONSECUTIVE_EMPTY_READS;
|
||||
}
|
||||
if s.successive_empty_token_count > s.max_consecutive_empty_reads {
|
||||
set_err(s, .No_Progress);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If an error is hit, no token can be created
|
||||
if s._err != nil {
|
||||
s.start = 0;
|
||||
s.end = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
// More data must be required to be read
|
||||
if s.start > 0 && (s.end == len(s.buf) || s.start > len(s.buf)/2) {
|
||||
copy(s.buf[:], s.buf[s.start:s.end]);
|
||||
s.end -= s.start;
|
||||
s.start = 0;
|
||||
}
|
||||
|
||||
could_be_too_short := false;
|
||||
|
||||
// Resize the buffer if full
|
||||
if s.end == len(s.buf) {
|
||||
if s.max_token_size <= 0 {
|
||||
s.max_token_size = DEFAULT_MAX_SCAN_TOKEN_SIZE;
|
||||
}
|
||||
if len(s.buf) >= s.max_token_size {
|
||||
set_err(s, .Too_Long);
|
||||
return false;
|
||||
}
|
||||
// overflow check
|
||||
new_size := _INIT_BUF_SIZE;
|
||||
if len(s.buf) > 0 {
|
||||
overflowed: bool;
|
||||
if new_size, overflowed = intrinsics.overflow_mul(len(s.buf), 2); overflowed {
|
||||
set_err(s, .Too_Long);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
old_size := len(s.buf);
|
||||
new_size = min(new_size, s.max_token_size);
|
||||
resize(&s.buf, new_size);
|
||||
s.end -= s.start;
|
||||
s.start = 0;
|
||||
|
||||
could_be_too_short = old_size >= len(s.buf);
|
||||
|
||||
}
|
||||
|
||||
// Read data into the buffer
|
||||
loop := 0;
|
||||
for {
|
||||
n, err := io.read(s.r, s.buf[s.end:len(s.buf)]);
|
||||
if n < 0 || len(s.buf)-s.end < n {
|
||||
set_err(s, .Bad_Read_Count);
|
||||
break;
|
||||
}
|
||||
s.end += n;
|
||||
if err != nil {
|
||||
set_err(s, err);
|
||||
break;
|
||||
}
|
||||
if n > 0 {
|
||||
s.successive_empty_token_count = 0;
|
||||
break;
|
||||
}
|
||||
loop += 1;
|
||||
|
||||
if s.max_consecutive_empty_reads <= 0 {
|
||||
s.max_consecutive_empty_reads = DEFAULT_MAX_CONSECUTIVE_EMPTY_READS;
|
||||
}
|
||||
if loop > s.max_consecutive_empty_reads {
|
||||
if could_be_too_short {
|
||||
set_err(s, .Too_Short);
|
||||
} else {
|
||||
set_err(s, .No_Progress);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
scan_bytes :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) {
|
||||
if at_eof && len(data) == 0 {
|
||||
return;
|
||||
}
|
||||
return 1, data[0:1], nil, false;
|
||||
}
|
||||
|
||||
scan_runes :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) {
|
||||
if at_eof && len(data) == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
if data[0] < utf8.RUNE_SELF {
|
||||
advance = 1;
|
||||
token = data[0:1];
|
||||
return;
|
||||
}
|
||||
|
||||
_, width := utf8.decode_rune(data);
|
||||
if width > 1 {
|
||||
advance = width;
|
||||
token = data[0:width];
|
||||
return;
|
||||
}
|
||||
|
||||
if !at_eof && !utf8.full_rune(data) {
|
||||
return;
|
||||
}
|
||||
|
||||
@thread_local ERROR_RUNE := []byte{0xef, 0xbf, 0xbd};
|
||||
|
||||
advance = 1;
|
||||
token = ERROR_RUNE;
|
||||
return;
|
||||
}
|
||||
|
||||
scan_words :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) {
|
||||
is_space :: proc "contextless" (r: rune) -> bool {
|
||||
switch r {
|
||||
// lower ones
|
||||
case ' ', '\t', '\n', '\v', '\f', '\r':
|
||||
return true;
|
||||
case '\u0085', '\u00a0':
|
||||
return true;
|
||||
// higher ones
|
||||
case '\u2000' ..= '\u200a':
|
||||
return true;
|
||||
case '\u1680', '\u2028', '\u2029', '\u202f', '\u205f', '\u3000':
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// skip spaces at the beginning
|
||||
start := 0;
|
||||
for width := 0; start < len(data); start += width {
|
||||
r: rune;
|
||||
r, width = utf8.decode_rune(data[start:]);
|
||||
if !is_space(r) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for width, i := 0, start; i < len(data); i += width {
|
||||
r: rune;
|
||||
r, width = utf8.decode_rune(data[i:]);
|
||||
if is_space(r) {
|
||||
advance = i+width;
|
||||
token = data[start:i];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if at_eof && len(data) > start {
|
||||
advance = len(data);
|
||||
token = data[start:];
|
||||
return;
|
||||
}
|
||||
|
||||
advance = start;
|
||||
return;
|
||||
}
|
||||
|
||||
scan_lines :: proc(data: []byte, at_eof: bool) -> (advance: int, token: []byte, err: Scanner_Error, final_token: bool) {
|
||||
trim_carriage_return :: proc "contextless" (data: []byte) -> []byte {
|
||||
if len(data) > 0 && data[len(data)-1] == '\r' {
|
||||
return data[0:len(data)-1];
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
if at_eof && len(data) == 0 {
|
||||
return;
|
||||
}
|
||||
if i := bytes.index_byte(data, '\n'); i >= 0 {
|
||||
advance = i+1;
|
||||
token = trim_carriage_return(data[0:i]);
|
||||
return;
|
||||
}
|
||||
|
||||
if at_eof {
|
||||
advance = len(data);
|
||||
token = trim_carriage_return(data);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -6557,10 +6557,54 @@ ExprKind check_expr_base_internal(CheckerContext *c, Operand *o, Ast *node, Type
|
||||
break; // NOTE(bill): No need to init
|
||||
}
|
||||
if (t->Struct.is_raw_union) {
|
||||
if (cl->elems.count != 0) {
|
||||
gbString type_str = type_to_string(type);
|
||||
error(node, "Illegal compound literal type '%s'", type_str);
|
||||
gb_string_free(type_str);
|
||||
if (cl->elems.count > 0) {
|
||||
// NOTE: unions cannot be constant
|
||||
is_constant = false;
|
||||
|
||||
if (cl->elems[0]->kind != Ast_FieldValue) {
|
||||
gbString type_str = type_to_string(type);
|
||||
error(node, "%s ('struct #raw_union') compound literals are only allowed to contain 'field = value' elements", type_str);
|
||||
gb_string_free(type_str);
|
||||
} else {
|
||||
if (cl->elems.count != 1) {
|
||||
gbString type_str = type_to_string(type);
|
||||
error(node, "%s ('struct #raw_union') compound literals are only allowed to contain up to 1 'field = value' element, got %td", type_str, cl->elems.count);
|
||||
gb_string_free(type_str);
|
||||
} else {
|
||||
Ast *elem = cl->elems[0];
|
||||
ast_node(fv, FieldValue, elem);
|
||||
if (fv->field->kind != Ast_Ident) {
|
||||
gbString expr_str = expr_to_string(fv->field);
|
||||
error(elem, "Invalid field name '%s' in structure literal", expr_str);
|
||||
gb_string_free(expr_str);
|
||||
break;
|
||||
}
|
||||
|
||||
String name = fv->field->Ident.token.string;
|
||||
|
||||
Selection sel = lookup_field(type, name, o->mode == Addressing_Type);
|
||||
bool is_unknown = sel.entity == nullptr;
|
||||
if (is_unknown) {
|
||||
error(elem, "Unknown field '%.*s' in structure literal", LIT(name));
|
||||
break;
|
||||
}
|
||||
|
||||
if (sel.index.count > 1) {
|
||||
error(elem, "Cannot assign to an anonymous field '%.*s' in a structure literal (at the moment)", LIT(name));
|
||||
break;
|
||||
}
|
||||
|
||||
Entity *field = t->Struct.fields[sel.index[0]];
|
||||
add_entity_use(c, fv->field, field);
|
||||
|
||||
Operand o = {};
|
||||
check_expr_or_type(c, &o, fv->value, field->type);
|
||||
|
||||
|
||||
check_assignment(c, &o, field->type, str_lit("structure literal"));
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -3601,7 +3601,7 @@ void lb_mem_zero_ptr_internal(lbProcedure *p, LLVMValueRef ptr, LLVMValueRef len
|
||||
lb_type(p->module, t_int)
|
||||
};
|
||||
unsigned id = LLVMLookupIntrinsicID(name, gb_strlen(name));
|
||||
GB_ASSERT_MSG(id != 0, "Unable to find %s.%s.%s.%s", name, LLVMPrintTypeToString(types[0]), LLVMPrintTypeToString(types[1]), LLVMPrintTypeToString(types[2]));
|
||||
GB_ASSERT_MSG(id != 0, "Unable to find %s.%s.%s", name, LLVMPrintTypeToString(types[0]), LLVMPrintTypeToString(types[1]));
|
||||
LLVMValueRef ip = LLVMGetIntrinsicDeclaration(p->module->mod, id, types, gb_count_of(types));
|
||||
|
||||
LLVMValueRef args[4] = {};
|
||||
@@ -6839,6 +6839,10 @@ lbValue lb_const_value(lbModule *m, Type *type, ExactValue value, bool allow_loc
|
||||
return lb_const_nil(m, original_type);
|
||||
}
|
||||
|
||||
if (is_type_raw_union(type)) {
|
||||
return lb_const_nil(m, original_type);
|
||||
}
|
||||
|
||||
isize offset = 0;
|
||||
if (type->Struct.custom_align > 0) {
|
||||
offset = 1;
|
||||
@@ -11141,26 +11145,27 @@ lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, lbValue x) {
|
||||
return res;
|
||||
}
|
||||
} else if (is_type_slice(t)) {
|
||||
lbValue len = lb_emit_struct_ev(p, x, 1);
|
||||
lbValue data = lb_emit_struct_ev(p, x, 0);
|
||||
if (op_kind == Token_CmpEq) {
|
||||
res.value = LLVMBuildIsNull(p->builder, len.value, "");
|
||||
res.value = LLVMBuildIsNull(p->builder, data.value, "");
|
||||
return res;
|
||||
} else if (op_kind == Token_NotEq) {
|
||||
res.value = LLVMBuildIsNotNull(p->builder, len.value, "");
|
||||
res.value = LLVMBuildIsNotNull(p->builder, data.value, "");
|
||||
return res;
|
||||
}
|
||||
} else if (is_type_dynamic_array(t)) {
|
||||
lbValue cap = lb_emit_struct_ev(p, x, 2);
|
||||
lbValue data = lb_emit_struct_ev(p, x, 0);
|
||||
if (op_kind == Token_CmpEq) {
|
||||
res.value = LLVMBuildIsNull(p->builder, cap.value, "");
|
||||
res.value = LLVMBuildIsNull(p->builder, data.value, "");
|
||||
return res;
|
||||
} else if (op_kind == Token_NotEq) {
|
||||
res.value = LLVMBuildIsNotNull(p->builder, cap.value, "");
|
||||
res.value = LLVMBuildIsNotNull(p->builder, data.value, "");
|
||||
return res;
|
||||
}
|
||||
} else if (is_type_map(t)) {
|
||||
lbValue cap = lb_map_cap(p, x);
|
||||
return lb_emit_comp(p, op_kind, cap, lb_zero(p->module, cap.type));
|
||||
lbValue hashes = lb_emit_struct_ev(p, x, 0);
|
||||
lbValue data = lb_emit_struct_ev(p, hashes, 0);
|
||||
return lb_emit_comp(p, op_kind, data, lb_zero(p->module, data.type));
|
||||
} else if (is_type_union(t)) {
|
||||
if (type_size_of(t) == 0) {
|
||||
if (op_kind == Token_CmpEq) {
|
||||
@@ -11181,21 +11186,35 @@ lbValue lb_emit_comp_against_nil(lbProcedure *p, TokenKind op_kind, lbValue x) {
|
||||
} else if (is_type_soa_struct(t)) {
|
||||
Type *bt = base_type(t);
|
||||
if (bt->Struct.soa_kind == StructSoa_Slice) {
|
||||
lbValue len = lb_soa_struct_len(p, x);
|
||||
LLVMValueRef the_value = {};
|
||||
if (bt->Struct.fields.count == 0) {
|
||||
lbValue len = lb_soa_struct_len(p, x);
|
||||
the_value = len.value;
|
||||
} else {
|
||||
lbValue first_field = lb_emit_struct_ev(p, x, 0);
|
||||
the_value = first_field.value;
|
||||
}
|
||||
if (op_kind == Token_CmpEq) {
|
||||
res.value = LLVMBuildIsNull(p->builder, len.value, "");
|
||||
res.value = LLVMBuildIsNull(p->builder, the_value, "");
|
||||
return res;
|
||||
} else if (op_kind == Token_NotEq) {
|
||||
res.value = LLVMBuildIsNotNull(p->builder, len.value, "");
|
||||
res.value = LLVMBuildIsNotNull(p->builder, the_value, "");
|
||||
return res;
|
||||
}
|
||||
} else if (bt->Struct.soa_kind == StructSoa_Dynamic) {
|
||||
lbValue cap = lb_soa_struct_cap(p, x);
|
||||
LLVMValueRef the_value = {};
|
||||
if (bt->Struct.fields.count == 0) {
|
||||
lbValue cap = lb_soa_struct_cap(p, x);
|
||||
the_value = cap.value;
|
||||
} else {
|
||||
lbValue first_field = lb_emit_struct_ev(p, x, 0);
|
||||
the_value = first_field.value;
|
||||
}
|
||||
if (op_kind == Token_CmpEq) {
|
||||
res.value = LLVMBuildIsNull(p->builder, cap.value, "");
|
||||
res.value = LLVMBuildIsNull(p->builder, the_value, "");
|
||||
return res;
|
||||
} else if (op_kind == Token_NotEq) {
|
||||
res.value = LLVMBuildIsNotNull(p->builder, cap.value, "");
|
||||
res.value = LLVMBuildIsNotNull(p->builder, the_value, "");
|
||||
return res;
|
||||
}
|
||||
}
|
||||
@@ -13449,6 +13468,8 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
|
||||
TypeStruct *st = &bt->Struct;
|
||||
if (cl->elems.count > 0) {
|
||||
lb_addr_store(p, v, lb_const_value(p->module, type, exact_value_compound(expr)));
|
||||
lbValue comp_lit_ptr = lb_addr_get_ptr(p, v);
|
||||
|
||||
for_array(field_index, cl->elems) {
|
||||
Ast *elem = cl->elems[field_index];
|
||||
|
||||
@@ -13477,6 +13498,12 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
|
||||
|
||||
field_expr = lb_build_expr(p, elem);
|
||||
|
||||
lbValue gep = {};
|
||||
if (is_raw_union) {
|
||||
gep = lb_emit_conv(p, comp_lit_ptr, alloc_type_pointer(ft));
|
||||
} else {
|
||||
gep = lb_emit_struct_ep(p, comp_lit_ptr, cast(i32)index);
|
||||
}
|
||||
|
||||
Type *fet = field_expr.type;
|
||||
GB_ASSERT(fet->kind != Type_Tuple);
|
||||
@@ -13485,11 +13512,9 @@ lbAddr lb_build_addr(lbProcedure *p, Ast *expr) {
|
||||
if (is_type_union(ft) && !are_types_identical(fet, ft) && !is_type_untyped(fet)) {
|
||||
GB_ASSERT_MSG(union_variant_index(ft, fet) > 0, "%s", type_to_string(fet));
|
||||
|
||||
lbValue gep = lb_emit_struct_ep(p, lb_addr_get_ptr(p, v), cast(i32)index);
|
||||
lb_emit_store_union_variant(p, gep, field_expr, fet);
|
||||
} else {
|
||||
lbValue fv = lb_emit_conv(p, field_expr, ft);
|
||||
lbValue gep = lb_emit_struct_ep(p, lb_addr_get_ptr(p, v), cast(i32)index);
|
||||
lb_emit_store(p, gep, fv);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user